In [7]:
# Tratamiento de datos
# ==============================================================================
import pandas as pd
import numpy as np

In [8]:
# Conexión a BD
# ==============================================================================
import pyodbc

In [9]:
# Configuración de BD y conexión
# ==============================================================================
direccion_servidor = '192.168.1.32,55078'
nombre_bd = 'BDVirunet'
nombre_usuario = 'sa'
password = '123456'

try:
    conexion = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER=' + 
                              direccion_servidor+';DATABASE='+nombre_bd+';UID='+nombre_usuario+';PWD=' + password)
    print("Conexión exitosa")
    
except Exception as e:
    print("Ocurrió un error al conectar a SQL Server: ", e)

Conexión exitosa


In [10]:
dFechaInicio = '2019-01-01'
dFechaFin = '2019-12-31'

In [11]:
try:
    with conexion.cursor() as cursor:
            consulta = "exec STP_OBTENER_TOTALIZADO_PLAGAS ?,?,?,?"
            cursor.execute(consulta, (dFechaInicio, dFechaFin, 1,1))

            rows = cursor.fetchall()
            
            cols = []
            
            for i,_ in enumerate(cursor.description):
                cols.append(cursor.description[i][0])

            pd.DataFrame(np.array(rows), columns = cols)
            
            df = pd.DataFrame(columns =['Evaluador','Fecha','Fundo','Filtrado','Turno','Lote','Grupo_Plaga','Plaga','Caracteristica','Suma','Grado','Promedio','Porcentaje'] )
            for i in range(len(rows)):
                df.loc[i] = list(rows[i])
except Exception as e: 
    print(e)

In [12]:
# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns

In [13]:
# Configuración matplotlib
# ==============================================================================
plt.rcParams['image.cmap'] = "bwr"
#plt.rcParams['figure.dpi'] = "100"
plt.rcParams['savefig.bbox'] = "tight"
style.use('ggplot') or plt.style.use('ggplot')

# Configuración warnings
# ==============================================================================
import warnings
warnings.filterwarnings('ignore')

In [14]:
arrPlagas = df["Plaga"].unique()
arrFiltrado = df["Filtrado"].unique()

In [15]:
from pathlib import Path
from datetime import datetime

In [None]:
for filtrado in arrFiltrado:
    dfFiltrado = df[df["Filtrado"] == filtrado]
    arrLote = dfFiltrado["Lote"].unique()
    for lote  in arrLote:
            dfLote = dfFiltrado[dfFiltrado['Lote'] == lote]
            arrPlagaxLote = dfLote['Plaga'].unique()
            for plaga in arrPlagaxLote:
                dfPlaga = dfLote[dfLote['Plaga'] == plaga]
                arrPlagaxCarac = dfPlaga['Caracteristica'].unique()
                for caracteristica in arrPlagaxCarac:
                    dfCarac = dfPlaga[dfPlaga['Caracteristica'] == caracteristica]
                    dfCarac = dfCarac.sort_values(by=['Fecha'])

                    dfCarac = dfCarac[dfCarac["Suma"] > 0]         
                    print(dfCarac.shape[0])
                    x = dfCarac["Fecha"]
                    y = dfCarac["Suma"]
                    title = "F{}- L{} - {} - {}".format(filtrado,lote,plaga,caracteristica)

                    pathCarac = Path('{}/{}'.format('img',plaga.replace('/','')))
                    pathCarac.mkdir(parents=True, exist_ok=True)  


                    fig = plt.figure(figsize=(12, 6))
                    plt.plot(x,y)
                    plt.title(title)
                    # plt.savefig('{}/{}/{}.png'.format('img',plaga.replace('/',''),caracteristica.replace('/','')))
                    plt.show()
               

In [None]:
df["Lote"].unique().shape[0]

# Modelando

In [None]:
df["Filtrado"].unique()

In [16]:
for filtrado in df["Filtrado"].unique():
    print("Filtrado: " + filtrado)
    nCantidadFiltrado = df[df["Filtrado"] == filtrado].shape[0]
    print("Cantidad: " + str(nCantidadFiltrado))
    print("**************************")

Filtrado: 10N
Cantidad: 1854
**************************
Filtrado: 11N
Cantidad: 2320
**************************
Filtrado: 13
Cantidad: 2032
**************************
Filtrado: 15
Cantidad: 511
**************************
Filtrado: 16
Cantidad: 666
**************************
Filtrado: 17
Cantidad: 1059
**************************
Filtrado: 18
Cantidad: 376
**************************
Filtrado: 20
Cantidad: 1000
**************************
Filtrado: 22
Cantidad: 617
**************************
Filtrado: 29
Cantidad: 3529
**************************
Filtrado: 30
Cantidad: 2962
**************************
Filtrado: 9N
Cantidad: 51
**************************


In [17]:
filtradosEleg = ["17","20","10N"]
dfFilEleg = df[df["Filtrado"].isin(filtradosEleg)]

In [None]:
dfFilEleg.shape[0]

## Filtrado 17, 20 y 10N

In [None]:
dfFilEleg["Plaga"].unique()

### Plaga : OLYGONICHUS PUNICAE

In [18]:
dfPlg = dfFilEleg[dfFilEleg["Plaga"] == "OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI"]

#### Caracteristia G° INFESTACIÓN (NINFAS-ADULTOS)	

In [19]:
dfCarac = dfPlg[dfPlg["Caracteristica"] == "G° INFESTACIÓN (NINFAS-ADULTOS)" ]

#### Todo el año

In [20]:
dtIniPoda = datetime.strptime('2019-01-01', '%Y-%m-%d')
dtFinPoda = datetime.strptime('2019-12-31', '%Y-%m-%d')
dfPoda = dfCarac[(dfCarac["Fecha"] >=  dtIniPoda.date()) & (dfCarac["Fecha"] <= dtFinPoda.date())]

In [21]:
data_weather = pd.ExcelFile('D:/Proyectos GitHub/Python/Datos/Datos Climaticos Viru Palto.xlsx')
df_weather = data_weather.parse('TABLA')  
data_weather.close()

In [22]:
df_weather = df_weather[df_weather['Año'] == 2019]
df_weather = df_weather[(df_weather['Fecha'] >= dFechaInicio) & (df_weather['Fecha'] <= dFechaFin)]
df_weather.reset_index(drop = True, inplace = True)

In [23]:
df_weather = df_weather[['Fecha','ET(mm).','Tº MIN/DIA (°C)','Tº MAX/DIA (°C)','Rad. Solar Prom.','HUMEDAD PROM.', 'T° Prom/Día (°C)','Semana']]
df_weather = df_weather.rename(columns = {'ET(mm).' : 'ET', 
                                      'Tº MIN/DIA (°C)' : 'T_MIN',
                                     'Tº MAX/DIA (°C)': 'T_MAX',
                                     'Rad. Solar Prom.' : 'RS_PROM',
                                     'HUMEDAD PROM.': 'HM_PROM',
                                      'T° Prom/Día (°C)': 'TMP_PROM',
                                         'Semana': 'Semana'})

In [None]:
arrPlagas = []
dfPlagasElegidas = dfPoda[dfPoda['Plaga'] == 'OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI']
dfFilter = dfPlagasElegidas
arrFiltrados = dfPlagasElegidas['Filtrado'].unique()
#dfFilter = df
dfCorrexLote = pd.DataFrame(columns=['Filtrado', 'Turno', 'Lote', 'Plaga','Caracteristica'])
iContador = 0
for filtrado in arrFiltrados:
    dfFiltrado = dfFilter[dfFilter['Filtrado'] == filtrado]
    arrTurno = dfFiltrado['Turno'].unique()
    for turno in arrTurno:
        dfTurno = dfFiltrado[dfFiltrado['Turno'] == turno]
        arrLote = dfTurno['Lote'].unique()
        for lote in arrLote:  
            dfLote = dfTurno[dfTurno['Lote'] == lote]
            arrPlagaxLote = dfLote['Plaga'].unique()
            for plaga in arrPlagaxLote:
                dfPlaga = dfLote[dfLote['Plaga'] == plaga]
                arrPlagaxCarac = dfPlaga['Caracteristica'].unique()
                for caracteristica in arrPlagaxCarac:
                    dfCarac = dfPlaga[dfPlaga['Caracteristica'] == caracteristica]
                    dfCarac = dfCarac.sort_values(by=['Fecha'])
         
                    dfCarac = dfCarac.groupby(['Fecha','Fundo','Filtrado','Turno','Plaga','Caracteristica'])['Grado'].sum().reset_index()
                    
                    fl_df_weather = df_weather[df_weather['Fecha'].isin(dfCarac['Fecha'])]
                    fl_df_weather = fl_df_weather.sort_values(by=['Fecha'])
                    fl_df_weather.reset_index(drop = True, inplace = True)
                    fl_df_weather = fl_df_weather.rename(columns = {'Fecha' : 'FechaClima'})
                    df_union = fl_df_weather.join(dfCarac)
                 
                    # Filtrar el dataset por las variables que se usarán
                    #df_union = df_union[['Fecha','ET','TMP_PROM','T_MIN','T_MAX','RS_PROM','HM_PROM','Grado']]        
                    df_union = df_union[['Fecha','ET','TMP_PROM','RS_PROM','HM_PROM','Grado']]   
                    
                    df_union[["Grado"]] = df_union[["Grado"]].apply(pd.to_numeric)
                    #print(df_union)
                
                    dfCorrelacion = df_union.corr().abs()
                    
                    dfCorrelacion_index = dfCorrelacion[dfCorrelacion['Grado'] > 0.60]

                    if dfCorrelacion_index.shape[0] > 1 and df_union.shape[0] > 5:   
                        
                        dfCorrexLote = dfCorrexLote.append({'Filtrado': filtrado,'Turno': turno,'Lote' : lote, 'Plaga': plaga,'Caracteristica': caracteristica},ignore_index=True)
                        arrPlagas.append(plaga + ' ' + caracteristica)
                        #Creando Mapa de Calor
                        sns.set()
                        
                        print('------------------------------------------------------------')
                        print('Lugar: Filtrado {} - Turno {} - Lote {}'.format(filtrado,turno, lote))
                        print('Plaga: ', plaga)
                        print('Caracteristica: ', caracteristica)      
                        
                        print('------------------------------------------------------------')
                        
                        print(df_union)
                        
                        ax = sns.heatmap(dfCorrelacion, annot = True)
                        iContador = iContador + 1
                        plt.title('{}'.format(caracteristica), fontdict = { 'fontsize' : '15'})
                        #plt.show()
                        
                        # Gráfico de distribución para cada variable numérica
                        # ==============================================================================
                        # Ajustar número de subplots en función del número de columnas

                        fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(11, 5))
                        axes = axes.flat
                        columnas_numeric = df_union[['ET','TMP_PROM','RS_PROM','HM_PROM','Grado']].select_dtypes(include=['float64', 'int']).columns

                        for i, colum in enumerate(columnas_numeric):
                            sns.histplot(
                                data    = df_union[['ET','TMP_PROM','RS_PROM','HM_PROM','Grado']],
                                x       = colum,
                                stat    = "count",
                                kde     = True,
                                color   = (list(plt.rcParams['axes.prop_cycle'])*2)[i]["color"],
                                line_kws= {'linewidth': 2},
                                alpha   = 0.3,
                                ax      = axes[i]
                            )
                            axes[i].set_title(colum, fontsize = 10, fontweight = "bold")
                            axes[i].tick_params(labelsize = 8)
                            axes[i].set_xlabel("")



                        fig.tight_layout()
                        plt.subplots_adjust(top = 0.9)
                        fig.suptitle('Distribución variables numéricas', fontsize = 10, fontweight = "bold");

                        plt.show()
                        
                        

In [24]:
# Preprocesado y modelado
# ==============================================================================
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
data_weather = pd.ExcelFile('D:/Proyectos GitHub/Python/Datos/Datos Climaticos Viru Palto.xlsx')
df_weather = data_weather.parse('TABLA')  
data_weather.close()

In [None]:
df_weather = df_weather[df_weather['Año'] == 2019]
df_weather = df_weather[(df_weather['Fecha'] >= '2019-01-01') & (df_weather['Fecha'] <= '2019-12-31')]
df_weather.reset_index(drop = True, inplace = True)

In [None]:
df_weather = df_weather[['Fecha','ET(mm).','Tº MIN/DIA (°C)','Tº MAX/DIA (°C)','Rad. Solar Prom.','HUMEDAD PROM.', 'T° Prom/Día (°C)','Semana']]
df_weather = df_weather.rename(columns = {'ET(mm).' : 'ET', 
                                     'Rad. Solar Prom.' : 'RS_PROM',
                                     'HUMEDAD PROM.': 'HM_PROM',
                                      'T° Prom/Día (°C)': 'TMP_PROM',
                                         'Semana': 'Semana'})

In [None]:
import pickle

In [None]:
arrPlagas = []
dfFilter = dfPoda[dfPoda['Plaga'] == 'OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI']
arrFiltrados = dfFilter["Filtrado"].unique()
for filtrado in arrFiltrados:
    dfFiltrado = dfFilter[dfFilter['Filtrado'] == filtrado]
    arrTurno = dfFiltrado['Turno'].unique()
    for turno in arrTurno:
        dfTurno = dfFiltrado[dfFiltrado['Turno'] == turno]
        arrLote = dfTurno['Lote'].unique()
        for lote in arrLote:  
            dfLote = dfTurno[dfTurno['Lote'] == lote]
            arrPlagaxLote = dfLote['Plaga'].unique()
            for plaga in arrPlagaxLote:
                dfPlaga = dfLote[dfLote['Plaga'] == plaga]
                arrPlagaxCarac = dfPlaga['Caracteristica'].unique()
                for caracteristica in arrPlagaxCarac:
                    dfCarac = dfPlaga[dfPlaga['Caracteristica'] == caracteristica]
                    dfCarac = dfCarac.sort_values(by=['Fecha'])
         
                    dfCarac = dfCarac.groupby(['Fecha','Fundo','Filtrado','Turno','Plaga','Caracteristica'])['Grado'].sum().reset_index()
                    
                    fl_df_weather = df_weather[df_weather['Fecha'].isin(dfCarac['Fecha'])]
                    fl_df_weather = fl_df_weather.sort_values(by=['Fecha'])
                    fl_df_weather.reset_index(drop = True, inplace = True)
                    fl_df_weather = fl_df_weather.rename(columns = {'Fecha' : 'FechaClima'})
                    df_union = fl_df_weather.join(dfCarac)
                 
                    df_union = df_union[['Fecha','ET','TMP_PROM','RS_PROM','HM_PROM','Grado']]        
                                   
                    df_union[["Grado"]] = df_union[["Grado"]].apply(pd.to_numeric)
                
                    dfCorrelacion = df_union.corr().abs()
                    
                    
                    dfCorrelacion_index = dfCorrelacion[dfCorrelacion['Grado'] > 0.90]                
                    
                    if dfCorrelacion_index.shape[0] > 1:   
                        
                        arrColumns = dfCorrelacion_index.columns
                        arrColumnsUnion = []
                        ###########################################
                        for column in arrColumns:
                            if (dfCorrelacion_index[column]['Grado'] > 0.90) & (column != 'Grado'):
                                    arrColumnsUnion.append(column)
                        ############################################  
                        
                         #Creando regresión lineal
                        df_union = df_union.drop('Fecha' ,axis=1)
                        df_union = df_union.fillna(df_union.mean())
                       
                      
                        if df_union[arrColumnsUnion].shape[0] > 9:   
                            print('**********************************************************************************************************************')
                            # División de los datos en train y test
                            # ==============================================================================
                            print('Lugar: Filtrado {} - Turno {} - Lote {}'.format(filtrado,turno, lote))
                            print('Plaga: ', plaga)
                            print('Caracteristica: ', caracteristica)
                            print('Número de datos: ',  df_union[arrColumnsUnion].shape[0])
                            print('Variables X:', arrColumnsUnion)
                            print("\n"*2)
                            
                            X = df_union[arrColumnsUnion]
                            y = df_union['Grado']

                            X_train, X_test, y_train, y_test = train_test_split(
                                                                    X,
                                                                    y.values.reshape(-1,1),
                                                                    train_size   = 0.8,
                                                                    random_state = 1234,
                                                                    shuffle      = True
                                                                )
                            # Creación del modelo utilizando matrices como en scikitlearn
                            # ==============================================================================
                            # A la matriz de predictores se le tiene que añadir una columna de 1s para el intercept del modelo
                            X_train = sm.add_constant(X_train, prepend=True)
                            modelo = sm.OLS(endog=y_train, exog=X_train,)
                            modelo = modelo.fit()
                            print(modelo.summary())
                            
                            print('\n Coeficientes')
                            print(modelo.params)
                            
                            # Intervalos de confianza para los coeficientes del modelo
                            # ==============================================================================
                            intervalos_ci = modelo.conf_int(alpha=0.05)
                            intervalos_ci.columns = ['2.5%', '97.5%']
                            
                            print('\nIntervalos de confianza: ')
                            print(intervalos_ci)
                            
                            
                            print('\n Diagnostico de residuos:')
                            # Diagnóstico errores (residuos) de las predicciones de entrenamiento
                            # ==============================================================================
                            y_train = y_train.flatten()
                            prediccion_train = modelo.predict(exog = X_train)
                            residuos_train   = prediccion_train - y_train
                            
                            # Gráficos
                            # ==============================================================================
                            fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(9, 8))

                            axes[0, 0].scatter(y_train, prediccion_train, edgecolors=(0, 0, 0), alpha = 0.4)
                            axes[0, 0].plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()],
                                            'k--', color = 'black', lw=2)
                            axes[0, 0].set_title('Valor predicho vs valor real', fontsize = 10, fontweight = "bold")
                            axes[0, 0].set_xlabel('Real')
                            axes[0, 0].set_ylabel('Predicción')
                            axes[0, 0].tick_params(labelsize = 7)

                            axes[0, 1].scatter(list(range(len(y_train))), residuos_train,
                                               edgecolors=(0, 0, 0), alpha = 0.4)
                            axes[0, 1].axhline(y = 0, linestyle = '--', color = 'black', lw=2)
                            axes[0, 1].set_title('Residuos del modelo', fontsize = 10, fontweight = "bold")
                            axes[0, 1].set_xlabel('id')
                            axes[0, 1].set_ylabel('Residuo')
                            axes[0, 1].tick_params(labelsize = 7)

                            sns.histplot(
                                data    = residuos_train,
                                stat    = "density",
                                kde     = True,
                                line_kws= {'linewidth': 1},
                                color   = "firebrick",
                                alpha   = 0.3,
                                ax      = axes[1, 0]
                            )

                            axes[1, 0].set_title('Distribución residuos del modelo', fontsize = 10,
                                                 fontweight = "bold")
                            axes[1, 0].set_xlabel("Residuo")
                            axes[1, 0].tick_params(labelsize = 7)


                            sm.qqplot(
                                residuos_train,
                                fit   = True,
                                line  = 'q',
                                ax    = axes[1, 1], 
                                color = 'firebrick',
                                alpha = 0.4,
                                lw    = 2
                            )
                            axes[1, 1].set_title('Q-Q residuos del modelo', fontsize = 10, fontweight = "bold")
                            axes[1, 1].tick_params(labelsize = 7)

                            axes[2, 0].scatter(prediccion_train, residuos_train,
                                               edgecolors=(0, 0, 0), alpha = 0.4)
                            axes[2, 0].axhline(y = 0, linestyle = '--', color = 'black', lw=2)
                            axes[2, 0].set_title('Residuos del modelo vs predicción', fontsize = 10, fontweight = "bold")
                            axes[2, 0].set_xlabel('Predicción')
                            axes[2, 0].set_ylabel('Residuo')
                            axes[2, 0].tick_params(labelsize = 7)

                            # Se eliminan los axes vacíos
                            fig.delaxes(axes[2,1])

                            fig.tight_layout()
                            plt.subplots_adjust(top=0.9)
                            fig.suptitle('Diagnóstico residuos', fontsize = 12, fontweight = "bold");
                            plt.show()
                            
                            if lote == "2016":
                                filename='model_olygonichus_adultos.pkl'
                                pickle.dump(modelo, open(filename, 'wb'))
                            
                            # Predicciones con intervalo de confianza 
                            # ==============================================================================
                            predicciones = modelo.get_prediction(exog = X_train).summary_frame(alpha=0.05)
                            print('\n Predicciones:')
                            print(predicciones.head(4))
                            
                            # Error de test del modelo 
                            # ==============================================================================
                            X_test = sm.add_constant(X_test, prepend=True)
                            predicciones = modelo.predict(exog = X_test)
                            rmse = mean_squared_error(
                                    y_true  = y_test,
                                    y_pred  = predicciones,
                                    squared = False
                                   )
                            print("")
                            print(f"El error (rmse) de test es: {rmse}")
                            
                            print(X_test)
                            print(y_test)
                            
                            print("\n"*2)
                            print('**********************************************************************************************************************')
                            

In [26]:
dfPoda.head(5)

Unnamed: 0,Evaluador,Fecha,Fundo,Filtrado,Turno,Lote,Grupo_Plaga,Plaga,Caracteristica,Suma,Grado,Promedio,Porcentaje
3,hhonores,2019-11-01,VIRU - PALTO,10N,F10N,1031,HOJAS,"OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI",G° INFESTACIÓN (NINFAS-ADULTOS),29,1,--,--
4,tvelasquez,2019-09-10,VIRU - PALTO,10N,F10N,1010,HOJAS,"OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI",G° INFESTACIÓN (NINFAS-ADULTOS),25,1,--,--
5,acampos,2019-07-03,VIRU - PALTO,10N,F10N,1012,HOJAS,"OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI",G° INFESTACIÓN (NINFAS-ADULTOS),27,1,--,--
7,mpina,2019-09-26,VIRU - PALTO,10N,F10N,1009,HOJAS,"OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI",G° INFESTACIÓN (NINFAS-ADULTOS),44,2,--,--
8,fchavez,2019-12-19,VIRU - PALTO,10N,F10N,1012,HOJAS,"OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI",G° INFESTACIÓN (NINFAS-ADULTOS),87,4,--,--


In [27]:
dfxModelado = dfPoda[(dfPoda["Filtrado"] == "20") & (dfPoda["Turno"] == "F20") & (dfPoda["Lote"] == "2016")]

In [28]:
dfxModelado.head(3)

Unnamed: 0,Evaluador,Fecha,Fundo,Filtrado,Turno,Lote,Grupo_Plaga,Plaga,Caracteristica,Suma,Grado,Promedio,Porcentaje
9061,mmarquina,2019-12-26,VIRU - PALTO,20,F20,2016,HOJAS,"OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI",G° INFESTACIÓN (NINFAS-ADULTOS),73,3,--,--
9156,kobeso,2019-11-01,VIRU - PALTO,20,F20,2016,HOJAS,"OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI",G° INFESTACIÓN (NINFAS-ADULTOS),26,1,--,--
9284,lvillanueva,2019-11-21,VIRU - PALTO,20,F20,2016,HOJAS,"OLYGONICHUS PUNICAE, OLIGONYCHUS YOTHERSI",G° INFESTACIÓN (NINFAS-ADULTOS),32,1,--,--


In [29]:
dfCarac = dfxModelado.sort_values(by=['Fecha'])

dfCarac = dfCarac.groupby(['Fecha','Fundo','Filtrado','Turno','Plaga','Caracteristica'])['Grado'].sum().reset_index()

fl_df_weather = df_weather[df_weather['Fecha'].isin(dfCarac['Fecha'])]
fl_df_weather = fl_df_weather.sort_values(by=['Fecha'])
fl_df_weather.reset_index(drop = True, inplace = True)
fl_df_weather = fl_df_weather.rename(columns = {'Fecha' : 'FechaClima'})
df_union = fl_df_weather.join(dfCarac)

df_union = df_union[['Fecha','ET','TMP_PROM','RS_PROM','HM_PROM','Grado']]        

df_union[["Grado"]] = df_union[["Grado"]].apply(pd.to_numeric)

dfCorrelacion = df_union.corr().abs()

In [30]:
dfCorrelacion

Unnamed: 0,ET,TMP_PROM,RS_PROM,HM_PROM,Grado
ET,1.0,0.197164,0.990442,0.100725,0.15256
TMP_PROM,0.197164,1.0,0.110758,0.402649,0.922621
RS_PROM,0.990442,0.110758,1.0,0.052159,0.071982
HM_PROM,0.100725,0.402649,0.052159,1.0,0.361936
Grado,0.15256,0.922621,0.071982,0.361936,1.0


In [34]:
dfCorrelacion_index = dfCorrelacion[dfCorrelacion['Grado'] > 0.90]      
arrColumns = dfCorrelacion_index.columns
arrColumnsUnion = []
###########################################
for column in arrColumns:
    if (dfCorrelacion_index[column]['Grado'] > 0.90) & (column != 'Grado'):
            arrColumnsUnion.append(column)
############################################  

 #Creando regresión lineal
#df_union = df_union.drop('Fecha' ,axis=1)
df_union = df_union.fillna(df_union.mean())


if df_union[arrColumnsUnion].shape[0] > 9:   
    print('**********************************************************************************************************************')
    # División de los datos en train y test
    # ==============================================================================
    print('Lugar: Filtrado {} - Turno {} - Lote {}'.format("20","F20", "2016"))
    #print('Plaga: ', plaga)
    #print('Caracteristica: ', caracteristica)
    print('Número de datos: ',  df_union[arrColumnsUnion].shape[0])
    print('Variables X:', arrColumnsUnion)
    print("\n"*2)

    X = df_union[arrColumnsUnion]
    y = df_union['Grado']

    X_train, X_test, y_train, y_test = train_test_split(
                                            X,
                                            y.values.reshape(-1,1),
                                            train_size   = 0.8,
                                            random_state = 1234,
                                            shuffle      = True
                                        )
   
    modelo = LinearRegression()
    modelo = modelo.fit(X_train, y_train)
 



**********************************************************************************************************************
Lugar: Filtrado 20 - Turno F20 - Lote 2016
Número de datos:  10
Variables X: ['TMP_PROM']





In [35]:
y_test

array([[2],
       [1]], dtype=int64)

In [36]:
X_test

Unnamed: 0,TMP_PROM
7,20.645833
2,19.766667


In [2]:
import pickle

In [37]:
filename='model_olygonichus_adultos.pkl'
pickle.dump(modelo, open(filename, 'wb'))

In [38]:
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.predict(X_test)

In [39]:
result

array([[2.1160841 ],
       [1.52199657]])

In [None]:
to_predict_list = list(map(float, ["20"]))
to_predict_list

In [None]:
to_predict = np.array(to_predict_list).reshape(-1, 1)
to_predict

In [None]:
loaded_model.predict(to_predict).reshape(-1, 1)

In [41]:
#loaded_model = pickle.load(open("model_olygonichus_adultos.pkl", "rb"))
loaded_model.predict([[20]])

array([[1.67966909]])

In [5]:
import os

scores = {} # scores is an empty dict already
target = "model_olygonichus_adultos.pkl"
if os.path.getsize(target) > 0:      
    with open(target, "rb") as f:
        unpickler = pickle.Unpickler(f)
        # if file is not empty scores will be equal
        # to the value unpickled
        scores = unpickler.load()

In [6]:
scores

{}