# Imports

In [77]:
import numpy
from numpy import arange
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from joblib import dump
from joblib import load
import pickle

# Descargar Dataset

In [78]:
df = pd.read_csv('/home/user/machinelearning/imida/data/AL41.csv', sep=';')
dataset_validacion = pd.read_csv('/home/user/machinelearning/imida/data/AL41_validation.csv', sep=';', names=df.columns)
dataset_validacion.dropna(inplace=True)
dataset_validacion.head()

Unnamed: 0,CODEST,FECHA,VVMED,VVMAX,RVIENTO,DVMED,TMIN,TMAX,TMED,TMINABS,...,HRMAXABS,RADMED,RADMAX,HSOL,PREC,HORAS0,HORAS7,DEWPT,DPV,ETO_PM_FAO
0,AL41,01/01/2024,32,4822,2799,5983,363,1767,975,2563,...,838,10232,5143,6,0,0,8,226,53,713398532
1,AL41,02/01/2024,9,4665,755,16879,74,1811,764,-196,...,85,11344,7971,8,0,0,13,76,47,422334652
2,AL41,03/01/2024,15,3744,1315,16893,192,2151,958,92,...,834,12381,5297,8,0,0,9,79,67,548790064
3,AL41,04/01/2024,6,4292,483,3282,647,1734,1218,5732,...,867,5155,4115,6,0,0,2,713,43,589718576
4,AL41,05/01/2024,197,945,1704,26344,752,1655,1227,6798,...,868,8973,7046,7,0,0,0,253,67,1843253317


In [79]:
dataset_validacion.loc[60,:]

CODEST              AL41
FECHA         01/03/2024
VVMED               2,41
VVMAX               12,2
RVIENTO           248,47
DVMED             175,33
TMIN                24,8
TMAX               40,28
TMED               34,31
TMINABS             23,4
TMAXABS            41,02
HRMIN              25,46
HRMAX              65,39
HRMED              55,72
HRMINABS            21,3
HRMAXABS           67,56
RADMED            333,91
RADMAX             998,8
HSOL                  12
PREC                   0
HORAS0                 0
HORAS7                 0
DEWPT              18,03
DPV                 2,98
ETO_PM_FAO     8,8182404
Name: 60, dtype: object

# Pasar a floats

In [80]:
dataset_validacion.replace(',', '.', regex=True, inplace=True)
dataset_validacion = dataset_validacion.astype(str)   #Pasamos todo a String
for variable in dataset_validacion.columns:
    for x in dataset_validacion[variable]:
        if(('.' in x) or (x.isdigit())):   #Sería mejor una expresión regular del estilo [0-9]*[.][0-9]+
            #Si encontramos una variable con un punto, significa que es un float, por lo que lo pasamos a float
            dataset_validacion[variable] = dataset_validacion[variable].astype(float)
            break   #Con detectar una nos vale para cambiar toda la columna

# Columnas para fecha

In [81]:
dataset_validacion['FECHA'] = pd.to_datetime(dataset_validacion['FECHA'], format="%d/%m/%Y")

# extract the day, month, and year components
dataset_validacion['AÑO'] = dataset_validacion['FECHA'].dt.year
dataset_validacion['DIA'] = dataset_validacion['FECHA'].dt.day
dataset_validacion['MES'] = dataset_validacion['FECHA'].dt.month

# show the modified data frame
print(dataset_validacion.dtypes)

CODEST                object
FECHA         datetime64[ns]
VVMED                float64
VVMAX                float64
RVIENTO              float64
DVMED                float64
TMIN                 float64
TMAX                 float64
TMED                 float64
TMINABS              float64
TMAXABS              float64
HRMIN                float64
HRMAX                float64
HRMED                float64
HRMINABS             float64
HRMAXABS             float64
RADMED               float64
RADMAX               float64
HSOL                 float64
PREC                 float64
HORAS0               float64
HORAS7               float64
DEWPT                float64
DPV                  float64
ETO_PM_FAO           float64
AÑO                    int32
DIA                    int32
MES                    int32
dtype: object


# Descargar scalers y models

## Scaler

In [82]:
file = open('Standard_scaler.sav', 'rb')
scaler = pickle.load(file)
file.close()

## Model

In [83]:
model = load('Standard_model.sav')

# Agregar columna estaciones

In [84]:
estaciones = []
for x in dataset_validacion['FECHA'].values:
    if(int(x.astype(str)[5:7]) < 3):
        estaciones.append('INVIERNO')
    elif(int(x.astype(str)[5:7]) == 3):
        if(int(x.astype(str)[8:10]) <= 20):
            estaciones.append('INVIERNO')
        else:
            estaciones.append('PRIMAVERA')
    elif(int(x.astype(str)[5:7]) < 6):
        estaciones.append('PRIMAVERA')
    elif(int(x.astype(str)[5:7]) == 6):
        if(int(x.astype(str)[8:10]) <= 20):
            estaciones.append('PRIMAVERA')
        else:
            estaciones.append('VERANO')
    elif(int(x.astype(str)[5:7]) < 9):
        estaciones.append('VERANO')
    elif(int(x.astype(str)[5:7]) == 9):
        if(int(x.astype(str)[8:10]) <= 22):
            estaciones.append('VERANO')
        else:
            estaciones.append('OTOÑO')
    elif(int(x.astype(str)[5:7]) < 12):
        estaciones.append('OTOÑO')
    else:
        if(int(x.astype(str)[8:10]) >= 22):
            estaciones.append('INVIERNO')
        else:
            estaciones.append('OTOÑO')
dataset_validacion['ESTACION'] = estaciones
dataset_validacion.tail(5)

Unnamed: 0,CODEST,FECHA,VVMED,VVMAX,RVIENTO,DVMED,TMIN,TMAX,TMED,TMINABS,...,PREC,HORAS0,HORAS7,DEWPT,DPV,ETO_PM_FAO,AÑO,DIA,MES,ESTACION
162,AL41,2024-06-11,1.48,6.017,127.67,333.79,16.46,24.0,19.75,16.16,...,2.3,0.0,0.0,16.35,0.46,2.777017,2024,11,6,PRIMAVERA
163,AL41,2024-06-12,2.82,9.76,243.99,306.15,14.98,22.59,19.38,14.2,...,12.5,0.0,0.0,15.45,0.51,3.443402,2024,12,6,PRIMAVERA
164,AL41,2024-06-13,2.09,8.33,180.96,343.45,13.33,25.8,19.36,12.52,...,0.2,0.0,0.0,14.53,0.66,4.947626,2024,13,6,PRIMAVERA
165,AL41,2024-06-14,2.02,8.43,174.91,320.7,12.74,33.07,23.4,12.02,...,0.0,0.0,0.0,9.28,1.88,7.061606,2024,14,6,PRIMAVERA
166,AL41,2024-06-15,2.5,11.9,216.15,334.7,17.93,33.68,24.96,17.4,...,4.0,0.0,0.0,12.55,1.81,7.290027,2024,15,6,PRIMAVERA


# Dividir dataset

## Datos

In [85]:
dataset_reducido = dataset_validacion.select_dtypes(include=['float64', 'int64', 'int32'])
X = dataset_reducido.values
print(X)

[[3.2000e-01 4.8220e+00 2.7990e+01 ... 2.0240e+03 1.0000e+00 1.0000e+00]
 [9.0000e-02 4.6650e+00 7.5500e+00 ... 2.0240e+03 2.0000e+00 1.0000e+00]
 [1.5000e-01 3.7440e+00 1.3150e+01 ... 2.0240e+03 3.0000e+00 1.0000e+00]
 ...
 [2.0900e+00 8.3300e+00 1.8096e+02 ... 2.0240e+03 1.3000e+01 6.0000e+00]
 [2.0200e+00 8.4300e+00 1.7491e+02 ... 2.0240e+03 1.4000e+01 6.0000e+00]
 [2.5000e+00 1.1900e+01 2.1615e+02 ... 2.0240e+03 1.5000e+01 6.0000e+00]]


## Target

In [86]:
Y = dataset_validacion['ESTACION'].values
print(Y)

['INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO' 'INVIERNO'
 'PRIMAVERA' 'PRIMAVERA' 'PRIMAVERA' 'PRIMAVERA' 'PRIMAVERA' 'PRIMAVERA'
 'PRIMAVERA' 'PRIMAVERA' 'PRIMAVERA' 'PRIMAVERA' 'PRIMAV

# Escalar datos

In [87]:
X_estandarizado = scaler.transform(X)
print(X_estandarizado)

[[-2.24034202 -1.70179668 -2.23578704 ...  2.81027435 -1.67163695
  -1.62100904]
 [-2.52217139 -1.7633716  -2.52572474 ...  2.81027435 -1.55819941
  -1.62100904]
 [-2.44865069 -2.12458499 -2.44628975 ...  2.81027435 -1.44476187
  -1.62100904]
 ...
 [-0.07148124 -0.32596976 -0.06593529 ...  2.81027435 -0.31038649
  -0.17009454]
 [-0.1572554  -0.28675007 -0.15175345 ...  2.81027435 -0.19694895
  -0.17009454]
 [ 0.43091024  1.07417336  0.43322848 ...  2.81027435 -0.08351142
  -0.17009454]]


# Validar

In [88]:
predictions = model.predict(X_estandarizado)
print(accuracy_score(Y, predictions))
print(confusion_matrix(Y, predictions))
print(classification_report(Y, predictions))

1.0
[[78  0]
 [ 0 87]]
              precision    recall  f1-score   support

    INVIERNO       1.00      1.00      1.00        78
   PRIMAVERA       1.00      1.00      1.00        87

    accuracy                           1.00       165
   macro avg       1.00      1.00      1.00       165
weighted avg       1.00      1.00      1.00       165

