In [1]:
# Tratamiento de datos
# ==============================================================================
import pandas as pd
import numpy as np

# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns

# Preprocesado y modelado
# ==============================================================================
from scipy.stats import pearsonr
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm
from scipy import stats

# Configuración matplotlib
# ==============================================================================
plt.rcParams['image.cmap'] = "bwr"
#plt.rcParams['figure.dpi'] = "100"
plt.rcParams['savefig.bbox'] = "tight"
style.use('ggplot') or plt.style.use('ggplot')

# Configuración warnings
# ==============================================================================
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Conexión a BD
# ==============================================================================
import pyodbc

In [3]:
# Configuración de BD y conexión
# ==============================================================================
direccion_servidor = '192.168.1.32,55078'
nombre_bd = 'BDVirunet'
nombre_usuario = 'sa'
password = '123456'

try:
    conexion = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER=' + 
                              direccion_servidor+';DATABASE='+nombre_bd+';UID='+nombre_usuario+';PWD=' + password)
    print("Conexión exitosa")
    
except Exception as e:
    print("Ocurrió un error al conectar a SQL Server: ", e)

Conexión exitosa


In [4]:
dFechaInicio = '2020-01-01'
dFechaFin = '2020-12-31'

In [5]:
try:
    with conexion.cursor() as cursor:
            consulta = "exec STP_OBTENER_TOTALIZADO_PLAGAS ?,?,?,?"
            cursor.execute(consulta, (dFechaInicio, dFechaFin, 1,0))

            rows = cursor.fetchall()
            
            cols = []
            
            for i,_ in enumerate(cursor.description):
                cols.append(cursor.description[i][0])

            pd.DataFrame(np.array(rows), columns = cols)
            
            df = pd.DataFrame(columns =['Evaluador','Fecha','Fundo','Filtrado','Turno','Lote','Grupo_Plaga','Plaga','Caracteristica','Suma','Grado','Promedio','Porcentaje'] )
            for i in range(len(rows)):
                df.loc[i] = list(rows[i])
except Exception as e: 
    print(e)

In [47]:
dfPlaga =  df[df["Plaga"] == "DAGBERTUS MINENSIS, DAGBERTUS PERUANUS"]

In [49]:
dfPlaga["Lote"].unique().shape[0]

32

In [17]:
df17 = dfPlaga[dfPlaga["Filtrado"] == "17"]

In [18]:
df17['Semana'] = pd.to_datetime(df17['Fecha'], format='%Y-%m-%d').dt.weekofyear

In [40]:
for lote in df17["Lote"].unique():
    print("Lote "+ str(lote))
    print(df17[df17["Lote"] == lote].shape[0])
    print("********************")

Lote 1704
3
********************
Lote 1708
5
********************
Lote 1713
7
********************
Lote 1716
7
********************
Lote 1721
5
********************
Lote 1720
4
********************
Lote 1722
4
********************
Lote 1705
6
********************
Lote 1706
5
********************
Lote 1715
5
********************
Lote 1717
9
********************
Lote 1702
3
********************
Lote 1703
5
********************
Lote 1719
5
********************
Lote 1718
3
********************
Lote 1723
4
********************
Lote 1711
5
********************
Lote 1714
3
********************
Lote 1709
3
********************
Lote 1710
2
********************
Lote 1712
2
********************
Lote 1724
3
********************
Lote 1701
2
********************
Lote 1707
4
********************


In [44]:
df17[df17["Lote"] == "1717"].sort_values(by=['Semana'])

Unnamed: 0,Evaluador,Fecha,Fundo,Filtrado,Turno,Lote,Grupo_Plaga,Plaga,Caracteristica,Suma,Grado,Promedio,Porcentaje,Semana
2750,karaujo,2020-06-12,VIRU - PALTO,17,F17,1717,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,1,--,--,4,24
4729,karaujo,2020-07-02,VIRU - PALTO,17,F17,1717,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,1,--,--,4,27
4444,lvillanueva,2020-08-01,VIRU - PALTO,17,F17,1717,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,1,--,--,4,31
3944,yaira,2020-09-15,VIRU - PALTO,17,F17,1717,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,1,--,--,4,38
5731,kobeso,2020-09-21,VIRU - PALTO,17,F17,1717,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,2,--,--,8,39
5538,lvillanueva,2020-10-02,VIRU - PALTO,17,F17,1717,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,1,--,--,4,40
5591,karaujo,2020-10-06,VIRU - PALTO,17,F17,1717,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,2,--,--,8,41
5201,lvillanueva,2020-10-30,VIRU - PALTO,17,F17,1717,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,4,--,--,16,44
5022,lvillanueva,2020-11-05,VIRU - PALTO,17,F17,1717,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,5,--,--,20,45


In [50]:
dfPlaga.head()

Unnamed: 0,Evaluador,Fecha,Fundo,Filtrado,Turno,Lote,Grupo_Plaga,Plaga,Caracteristica,Suma,Grado,Promedio,Porcentaje
139,fchavez,2020-11-04,VIRU - PALTO,10,T01,1053,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,12,--,--,48
168,lminchola,2020-11-23,VIRU - PALTO,10,T01,1053,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,6,--,--,24
880,fchavez,2020-07-17,VIRU - PALTO,10N,F10N,1012,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,1,--,--,4
900,eestrada,2020-07-04,VIRU - PALTO,10N,F10N,1012,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,1,--,--,4
1067,naguilar,2020-10-21,VIRU - PALTO,10N,F10N,1012,RACIMOS FLORIALES,"DAGBERTUS MINENSIS, DAGBERTUS PERUANUS",N° INFLORESC. INFESTADAS,1,--,--,4


In [81]:
data_weather = pd.ExcelFile('D:/Proyectos GitHub/Python/Datos/Datos Climaticos Viru Palto.xlsx')
df_weather = data_weather.parse('TABLA')  
data_weather.close()

In [82]:
df_weather = df_weather[df_weather['Año'] == 2020]
df_weather = df_weather[(df_weather['Fecha'] >= dFechaInicio) & (df_weather['Fecha'] <= dFechaFin)]
df_weather.reset_index(drop = True, inplace = True)

In [83]:
df_weather = df_weather[['Fecha','ET(mm).','Tº MIN/DIA (°C)','Tº MAX/DIA (°C)','Rad. Solar Prom.','HUMEDAD PROM.', 'T° Prom/Día (°C)','Semana']]
df_weather = df_weather.rename(columns = {'ET(mm).' : 'ET', 
                                      'Tº MIN/DIA (°C)' : 'T_MIN',
                                     'Tº MAX/DIA (°C)': 'T_MAX',
                                     'Rad. Solar Prom.' : 'RS_PROM',
                                     'HUMEDAD PROM.': 'HM_PROM',
                                      'T° Prom/Día (°C)': 'TMP_PROM',
                                         'Semana': 'Semana'})

In [51]:
dfPlaga['Semana'] = pd.to_datetime(dfPlaga['Fecha'], format='%Y-%m-%d').dt.weekofyear

In [96]:
dfPlaga.shape[0]

139

In [110]:
dfFilterPlaga = dfPlaga[["Filtrado","Lote","Fecha","Semana","Porcentaje"]]

In [111]:
dfFilterPlaga.head()

Unnamed: 0,Filtrado,Lote,Fecha,Semana,Porcentaje
139,10,1053,2020-11-04,45,48
168,10,1053,2020-11-23,48,24
880,10N,1012,2020-07-17,29,4
900,10N,1012,2020-07-04,27,4
1067,10N,1012,2020-10-21,43,4


In [112]:
dfFilterPlaga["Filtrado"].unique()

array(['10', '10N', '11N', '17', '20'], dtype=object)

In [113]:
dfPlagaAll = pd.DataFrame(columns = ['FechaClima','ET','T_MIN','T_MAX','RS_PROM','HM_PROM','TMP_PROM','Semana','Filtrado','Lote','Fecha','Porcentaje'])

In [124]:
for filtrado in dfFilterPlaga["Filtrado"].unique():
    dfFiltrado = dfFilterPlaga[dfFilterPlaga["Filtrado"] == filtrado]
    arrLote = dfFiltrado["Lote"].unique()
    for lote in arrLote:
        print(lote)
        print(arrLote.shape[0])
        dfPlagaFiltrado = dfFiltrado[dfFiltrado["Lote"] == lote].sort_values(by=['Semana'])
        dfPlagaFiltrado.reset_index(drop = True, inplace = True)        
        fl_df_weather = df_weather[df_weather['Fecha'].isin(dfPlagaFiltrado['Fecha'])]
        fl_df_weather = fl_df_weather.sort_values(by=['Fecha'])
        fl_df_weather.reset_index(drop = True, inplace = True)
        fl_df_weather = fl_df_weather.rename(columns = {'Fecha' : 'FechaClima'})
        
        dfPlagaFiltrado = dfPlagaFiltrado.drop('Semana' ,axis=1)
        
        df_union = fl_df_weather.join(dfPlagaFiltrado)  
        df_union['Semana'] = pd.to_datetime(df_union['Fecha'], format='%Y-%m-%d').dt.weekofyear
       
        dfPlagaAll = pd.concat([dfPlagaAll, df_union])
       

1053
1
1012
1
1160
1
1704
24
1708
24
1713
24
1716
24
1721
24
1720
24
1722
24
1705
24
1706
24
1715
24
1717
24
1702
24
1703
24
1719
24
1718
24
1723
24
1711
24
1714
24
1709
24
1710
24
1712
24
1724
24
1701
24
1707
24
2020
5
2021
5
2025
5
2016
5
2019
5


In [119]:
dfPlagaAll.head()

Unnamed: 0,FechaClima,ET,T_MIN,T_MAX,RS_PROM,HM_PROM,TMP_PROM,Semana,Filtrado,Lote,Fecha,Porcentaje
0,2020-11-04,3.1,10.9,22.1,215.1,84.0,16.210417,45,10,1053,2020-11-04,48
1,2020-11-23,3.9,13.7,23.4,255.9,82.0,18.297917,48,10,1053,2020-11-23,24
0,2020-07-04,2.6,11.7,22.1,183.0,88.0,16.235417,27,10N,1012,2020-07-04,4
1,2020-07-17,1.8,14.8,20.1,131.6,88.0,16.15625,29,10N,1012,2020-07-17,4
2,2020-10-21,2.5,15.2,22.2,200.2,84.0,17.608889,43,10N,1012,2020-10-21,4


In [105]:
#dfPlagaAll = dfPlagaAll.drop_duplicates()

In [120]:
dfPlagaAll.sort_values(by=['Filtrado','Lote','Semana'])

Unnamed: 0,FechaClima,ET,T_MIN,T_MAX,RS_PROM,HM_PROM,TMP_PROM,Semana,Filtrado,Lote,Fecha,Porcentaje
0,2020-11-04,3.1,10.9,22.1,215.1,84.0,16.210417,45,10,1053,2020-11-04,48
0,2020-11-04,3.1,10.9,22.1,215.1,84.0,16.210417,45,10,1053,2020-11-04,48
0,2020-11-04,3.1,10.9,22.1,215.1,84.0,16.210417,45,10,1053,2020-11-04,48
1,2020-11-23,3.9,13.7,23.4,255.9,82.0,18.297917,48,10,1053,2020-11-23,24
1,2020-11-23,3.9,13.7,23.4,255.9,82.0,18.297917,48,10,1053,2020-11-23,24
...,...,...,...,...,...,...,...,...,...,...,...,...
6,2020-11-23,3.9,13.7,23.4,255.9,82.0,18.297917,48,20,2025,2020-11-23,16
6,2020-11-23,3.9,13.7,23.4,255.9,82.0,18.297917,48,20,2025,2020-11-23,16
7,2020-12-17,3.9,17.6,25.6,219.0,83.0,18.308333,51,20,2025,2020-12-17,4
7,2020-12-17,3.9,17.6,25.6,219.0,83.0,18.308333,51,20,2025,2020-12-17,4


In [109]:
dfPlagaAll.head()

Unnamed: 0,Filtrado,Lote,Semana,Fecha,Porcentaje,ET,T_MIN,T_MAX,RS_PROM,HM_PROM,TMP_PROM
0,10,1053,45,2020-11-04,,3.1,10.9,22.1,215.1,84.0,16.210417
1,10,1053,48,2020-11-23,,3.9,13.7,23.4,255.9,82.0,18.297917
0,10N,1012,27,2020-07-04,,2.6,11.7,22.1,183.0,88.0,16.235417
1,10N,1012,29,2020-07-17,,1.8,14.8,20.1,131.6,88.0,16.15625
2,10N,1012,43,2020-10-21,,2.5,15.2,22.2,200.2,84.0,17.608889


In [121]:
dfPlagaAll = dfPlagaAll[["Filtrado","Lote","Semana","Fecha","Porcentaje","ET","T_MIN","T_MAX","RS_PROM","HM_PROM","TMP_PROM"]]

In [122]:
dfPlagaAll.to_excel("datasets/Dagbertus2020.xlsx")

In [126]:
dfPlagaAll = dfPlagaAll[dfPlagaAll["Porcentaje"] != "4"]

In [128]:
dfPlagaAll = dfPlagaAll[(dfPlagaAll["Semana"] >= 38) & (dfPlagaAll["Semana"] <= 47)]

In [129]:
dfPlagaAll.shape[0]

260

In [130]:
for filtrado in dfPlagaAll["Filtrado"].unique():
    print(filtrado)

10
17
20
