In [1]:
# Tratamiento de datos
# ==============================================================================
import pandas as pd
import numpy as np

# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns

# Preprocesado y modelado
# ==============================================================================
from scipy.stats import pearsonr
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm
from scipy import stats

# Configuración matplotlib
# ==============================================================================
plt.rcParams['image.cmap'] = "bwr"
#plt.rcParams['figure.dpi'] = "100"
plt.rcParams['savefig.bbox'] = "tight"
style.use('ggplot') or plt.style.use('ggplot')

# Configuración warnings
# ==============================================================================
import warnings
warnings.filterwarnings('ignore')

In [2]:
 # Conexión a BD
# ==============================================================================
import pyodbc

In [3]:
# Configuración de BD y conexión
# ==============================================================================
direccion_servidor = '192.168.1.32,55078'
nombre_bd = 'BDVirunet'
nombre_usuario = 'sa'
password = '123456'

try:
    conexion = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER=' + 
                              direccion_servidor+';DATABASE='+nombre_bd+';UID='+nombre_usuario+';PWD=' + password)
    print("Conexión exitosa")
    
except Exception as e:
    print("Ocurrió un error al conectar a SQL Server: ", e)

Conexión exitosa


In [4]:
dFechaInicio = '2019-01-01'
dFechaFin = '2019-12-31'

In [None]:
try:
    with conexion.cursor() as cursor:
            consulta = "exec STP_OBTENER_TOTALIZADO_PLAGAS ?,?,?,?"
            cursor.execute(consulta, (dFechaInicio, dFechaFin, 1,0))

            rows = cursor.fetchall()
            
            cols = []
            
            for i,_ in enumerate(cursor.description):
                cols.append(cursor.description[i][0])

            pd.DataFrame(np.array(rows), columns = cols)
            
            df = pd.DataFrame(columns =['Evaluador','Fecha','Fundo','Filtrado','Turno','Lote','Grupo_Plaga','Plaga','Caracteristica','Suma','Grado','Promedio','Porcentaje'] )
            for i in range(len(rows)):
                df.loc[i] = list(rows[i])
except Exception as e: 
    print(e)

In [None]:
dfPlaga =  df[df["Plaga"] == "DAGBERTUS MINENSIS, DAGBERTUS PERUANUS"]

In [None]:
dfPlaga["Lote"].unique().shape[0]

In [None]:
df17 = dfPlaga[dfPlaga["Filtrado"] == "17"]

In [None]:
df17['Semana'] = pd.to_datetime(df17['Fecha'], format='%Y-%m-%d').dt.weekofyear

In [None]:
for lote in df17["Lote"].unique():
    print("Lote "+ str(lote))
    print(df17[df17["Lote"] == lote].shape[0])
    print("********************")

In [None]:
df17[df17["Lote"] == "1717"].sort_values(by=['Semana'])

In [None]:
dfPlaga.head()

In [None]:
data_weather = pd.ExcelFile('D:/Proyectos GitHub/Python/Datos/Datos Climaticos Viru Palto.xlsx')
df_weather = data_weather.parse('TABLA')  
data_weather.close()

In [None]:
df_weather = df_weather[df_weather['Año'] == 2019]
df_weather = df_weather[(df_weather['Fecha'] >= dFechaInicio) & (df_weather['Fecha'] <= dFechaFin)]
df_weather.reset_index(drop = True, inplace = True)

In [None]:
df_weather = df_weather[['Fecha','ET(mm).','Tº MIN/DIA (°C)','Tº MAX/DIA (°C)','Rad. Solar Prom.','HUMEDAD PROM.', 'T° Prom/Día (°C)','Semana']]
df_weather = df_weather.rename(columns = {'ET(mm).' : 'ET', 
                                      'Tº MIN/DIA (°C)' : 'T_MIN',
                                     'Tº MAX/DIA (°C)': 'T_MAX',
                                     'Rad. Solar Prom.' : 'RS_PROM',
                                     'HUMEDAD PROM.': 'HM_PROM',
                                      'T° Prom/Día (°C)': 'TMP_PROM',
                                         'Semana': 'Semana'})

In [None]:
dfPlaga['Semana'] = pd.to_datetime(dfPlaga['Fecha'], format='%Y-%m-%d').dt.weekofyear

In [None]:
dfPlaga.shape[0]

In [None]:
dfFilterPlaga = dfPlaga[["Filtrado","Lote","Fecha","Semana","Porcentaje"]]

In [None]:
dfFilterPlaga["Filtrado"].unique()

In [None]:
dfPlagaAll = pd.DataFrame(columns = ['FechaClima','ET','T_MIN','T_MAX','RS_PROM','HM_PROM','TMP_PROM','Semana','Filtrado','Lote','Fecha','Porcentaje'])

In [None]:
for filtrado in dfFilterPlaga["Filtrado"].unique():
    dfFiltrado = dfFilterPlaga[dfFilterPlaga["Filtrado"] == filtrado]
    arrLote = dfFiltrado["Lote"].unique()
    for lote in arrLote:
        
        dfPlagaFiltrado = dfFiltrado[dfFiltrado["Lote"] == lote].sort_values(by=['Semana'])
        dfPlagaFiltrado.reset_index(drop = True, inplace = True)        
        fl_df_weather = df_weather[df_weather['Fecha'].isin(dfPlagaFiltrado['Fecha'])]
        fl_df_weather = fl_df_weather.sort_values(by=['Fecha'])
        fl_df_weather.reset_index(drop = True, inplace = True)
        fl_df_weather = fl_df_weather.rename(columns = {'Fecha' : 'FechaClima'})
        
        dfPlagaFiltrado = dfPlagaFiltrado.drop('Semana' ,axis=1)
        
        df_union = fl_df_weather.join(dfPlagaFiltrado)  
        df_union['Semana'] = pd.to_datetime(df_union['Fecha'], format='%Y-%m-%d').dt.weekofyear
        dfPlagaAll = pd.concat([dfPlagaAll, df_union])
       

In [None]:
dfPlagaAll = dfPlagaAll.drop_duplicates()

In [None]:
dfPlagaAll.sort_values(by=['Filtrado','Lote','Semana'])

In [None]:
dfPlagaAll = dfPlagaAll[["Filtrado","Lote","Fecha",""]]

In [None]:
dfPlagaAll.to_excel("datasets/Dagbertus2019.xlsx")