#### Environment Setup

In [1]:
import pandas as pd
import numpy as np
import imdlib as imd
import os
from glob import iglob
import rasterio

#### Rain Data Path

In [2]:
base_path = r'C:\Users\Manan Arora\Desktop\AgriTech Project\Data Driven Business Metrics Project\Data\Weather\Rainfall'

#### Rain Data Conversion Function

In [3]:
def rain_data_process(base_path,year):
    
    ##read file
    fname = base_path + '\ind{}_rfp25.GRD'.format(year)
    if np.fromfile(fname).shape[0] == 3178237:
        data = np.fromfile(fname,np.dtype((np.float32,(135,129))),365)
    else:
        data = np.fromfile(fname,np.dtype((np.float32,(135,129))),366)
    
    #geocordinates decieded as per IMD website
    rain_lat = np.arange(6.5,38.75,0.25)
    rain_long = np.arange(66.5,100.25,0.25)
    
    ###create combination on the geocoordinates 
    long_rows = []
    lat_rows = []
    for long in rain_long:
        for lat in rain_lat:
            long_rows+=[str(long)+'E']
            lat_rows+=[str(lat)+'N']
    
    ###prepare columns for each day of the year
    date_columns = pd.date_range('01/01/{}'.format(year),'31/12/{}'.format(year)).strftime(date_format="%Y-%m-%d").tolist()                      
    ###organise data in dictionary for each day and each location
    records = dict()
    records['Latitude'] = lat_rows
    records['Longitude'] = long_rows
    for i in range(0,len(date_columns)):
        for j in range(0,len(rain_long)):
            for k in range(0,len(rain_lat)):
                records.setdefault(date_columns[i], []).append(data[i][j][k])
    
    ###convert to dataframe and replace unkonwn values with 0
    rain_dat = pd.DataFrame(records)
    rain_dat.replace(-999.0,np.nan,inplace=True)
                                 
    ###calculate monthly averages for the specified year
    jan_col = [col for col in rain_dat.columns if col.startswith('{}-01'.format(year))]
    feb_col = [col for col in rain_dat.columns if col.startswith('{}-02'.format(year))]
    mar_col = [col for col in rain_dat.columns if col.startswith('{}-03'.format(year))]
    apr_col = [col for col in rain_dat.columns if col.startswith('{}-04'.format(year))]
    may_col = [col for col in rain_dat.columns if col.startswith('{}-05'.format(year))]
    jun_col = [col for col in rain_dat.columns if col.startswith('{}-06'.format(year))]
    jul_col = [col for col in rain_dat.columns if col.startswith('{}-07'.format(year))]
    aug_col = [col for col in rain_dat.columns if col.startswith('{}-08'.format(year))]
    sep_col = [col for col in rain_dat.columns if col.startswith('{}-09'.format(year))]
    oct_col = [col for col in rain_dat.columns if col.startswith('{}-10'.format(year))]
    nov_col = [col for col in rain_dat.columns if col.startswith('{}-11'.format(year))]
    dec_col = [col for col in rain_dat.columns if col.startswith('{}-12'.format(year))]
                                 
    sum_jan = rain_dat[jan_col].sum(axis=1)
    sum_feb = rain_dat[feb_col].sum(axis=1)
    sum_mar = rain_dat[mar_col].sum(axis=1)
    sum_apr = rain_dat[apr_col].sum(axis=1)
    sum_may = rain_dat[may_col].sum(axis=1)
    sum_jun = rain_dat[jun_col].sum(axis=1)
    sum_jul = rain_dat[jul_col].sum(axis=1)
    sum_aug = rain_dat[aug_col].sum(axis=1)
    sum_sep = rain_dat[sep_col].sum(axis=1)
    sum_oct = rain_dat[oct_col].sum(axis=1)
    sum_nov = rain_dat[nov_col].sum(axis=1)
    sum_dec = rain_dat[dec_col].sum(axis=1)
    
    ###convert to dataframe
    final_rain_data = pd.DataFrame(data={'Longitude':long_rows,'Latitude':lat_rows,'R_Sum_Jan{}'.format(year):sum_jan,'R_Sum_Feb{}'.format(year):sum_feb,
                   'R_Sum_Mar{}'.format(year):sum_mar,'R_Sum_Apr{}'.format(year):sum_apr,'R_Sum_May{}'.format(year):sum_may,
                   'R_Sum_Jun{}'.format(year):sum_jun,'R_Sum_Jul{}'.format(year):sum_jul,'R_Sum_Aug{}'.format(year):sum_aug,
                   'R_Sum_Sep{}'.format(year):sum_sep,'R_Sum_Oct{}'.format(year):sum_oct,'R_Sum_Nov{}'.format(year):sum_nov,
                   'R_Sum_Dec{}'.format(year):sum_dec})
    ###save as csv
    final_rain_data.to_csv(r'C:\Users\Manan Arora\Desktop\AgriTech Project\Data Driven Business Metrics Project\Data\Weather\Rainfall_MonthlyTotal\MonthlyTotal_{}.csv'.format(year))
                                 
    print('Reading and converting rainfall data to monthly total per location for {} is complete'.format(year))

#### Executing the Function

In [4]:
years = list(range(2000,2018,1))
years = ['{}'.format(year) for year in years]
for year in years: 
    rain_data_process(base_path,year)

Reading and converting rainfall data to monthly total per location for 2000 is complete
Reading and converting rainfall data to monthly total per location for 2001 is complete
Reading and converting rainfall data to monthly total per location for 2002 is complete
Reading and converting rainfall data to monthly total per location for 2003 is complete
Reading and converting rainfall data to monthly total per location for 2004 is complete
Reading and converting rainfall data to monthly total per location for 2005 is complete
Reading and converting rainfall data to monthly total per location for 2006 is complete
Reading and converting rainfall data to monthly total per location for 2007 is complete
Reading and converting rainfall data to monthly total per location for 2008 is complete
Reading and converting rainfall data to monthly total per location for 2009 is complete
Reading and converting rainfall data to monthly total per location for 2010 is complete
Reading and converting rainfall 

#### Temperature Data Path

In [12]:
base_path = r'C:\Users\Manan Arora\Desktop\AgriTech Project\Data Driven Business Metrics Project\Data\Weather\Temperature\AverageT'

#### Temperate Data Conversion Function

In [13]:
def temp_data_process(base_path,year):
    
    ##read file
    fname = base_path + '\{}.GRD'.format(year)
    if np.fromfile(fname).shape[0] == 175382:
        data = np.fromfile(fname,np.dtype((np.float32,(31,31))),365)
    else:
        data = np.fromfile(fname,np.dtype((np.float32,(31,31))),366)
    
    #geocordinates decieded as per IMD website
    temp_lat = np.arange(7.5,38.5,1)
    temp_long = np.arange(67.5,98.5,1)
    
    ###create combination on the geocoordinates 
    long_rows = []
    lat_rows = []
    for long in temp_long:
        for lat in temp_lat:
            long_rows+=[str(long)+'E']
            lat_rows+=[str(lat)+'N']
    
    ###prepare columns for each day of the year
    date_columns = pd.date_range('01/01/{}'.format(year),'31/12/{}'.format(year)).strftime(date_format="%Y-%m-%d").tolist()                      
    ###organise data in dictionary for each day and each location
    records = dict()
    records['Latitude'] = lat_rows
    records['Longitude'] = long_rows
    for i in range(0,len(date_columns)):
        for j in range(0,len(temp_long)):
            for k in range(0,len(temp_lat)):
                records.setdefault(date_columns[i], []).append(data[i][j][k])
    
    ###convert to dataframe and replace unkonwn values with 0
    temp_dat = pd.DataFrame(records)
    temp_dat.replace(99.9000015258789,np.nan,inplace=True)
    
    ###calculate monthly averages for the specified year
    jan_col = [col for col in temp_dat.columns if col.startswith('{}-01'.format(year))]
    feb_col = [col for col in temp_dat.columns if col.startswith('{}-02'.format(year))]
    mar_col = [col for col in temp_dat.columns if col.startswith('{}-03'.format(year))]
    apr_col = [col for col in temp_dat.columns if col.startswith('{}-04'.format(year))]
    may_col = [col for col in temp_dat.columns if col.startswith('{}-05'.format(year))]
    jun_col = [col for col in temp_dat.columns if col.startswith('{}-06'.format(year))]
    jul_col = [col for col in temp_dat.columns if col.startswith('{}-07'.format(year))]
    aug_col = [col for col in temp_dat.columns if col.startswith('{}-08'.format(year))]
    sep_col = [col for col in temp_dat.columns if col.startswith('{}-09'.format(year))]
    oct_col = [col for col in temp_dat.columns if col.startswith('{}-10'.format(year))]
    nov_col = [col for col in temp_dat.columns if col.startswith('{}-11'.format(year))]
    dec_col = [col for col in temp_dat.columns if col.startswith('{}-12'.format(year))]
                                 
    avg_jan = temp_dat[jan_col].mean(axis=1)
    avg_feb = temp_dat[feb_col].mean(axis=1)
    avg_mar = temp_dat[mar_col].mean(axis=1)
    avg_apr = temp_dat[apr_col].mean(axis=1)
    avg_may = temp_dat[may_col].mean(axis=1)
    avg_jun = temp_dat[jun_col].mean(axis=1)
    avg_jul = temp_dat[jul_col].mean(axis=1)
    avg_aug = temp_dat[aug_col].mean(axis=1)
    avg_sep = temp_dat[sep_col].mean(axis=1)
    avg_oct = temp_dat[oct_col].mean(axis=1)
    avg_nov = temp_dat[nov_col].mean(axis=1)
    avg_dec = temp_dat[dec_col].mean(axis=1)
    
    ###convert to dataframe
    final_temp_data = pd.DataFrame(data={'Longitude':long_rows,'Latitude':lat_rows,'T_Avg_Jan{}'.format(year):avg_jan,'T_Avg_Feb{}'.format(year):avg_feb,
                   'T_Avg_Mar{}'.format(year):avg_mar,'T_Avg_Apr{}'.format(year):avg_apr,'T_Avg_May{}'.format(year):avg_may,
                   'T_Avg_Jun{}'.format(year):avg_jun,'T_Avg_Jul{}'.format(year):avg_jul,'T_Avg_Aug{}'.format(year):avg_aug,
                   'T_Avg_Sep{}'.format(year):avg_sep,'T_Avg_Oct{}'.format(year):avg_oct,'T_Avg_Nov{}'.format(year):avg_nov,
                   'T_Avg_Dec{}'.format(year):avg_dec})
    ###save as csv
    final_temp_data.to_csv(r'C:\Users\Manan Arora\Desktop\AgriTech Project\Data Driven Business Metrics Project\Data\Weather\Temperature_MonthlyAverages\MonthlyAverages_{}.csv'.format(year))
                                 
    print('Reading and converting temperate data to monthly average per location for {} is complete'.format(year))
    

#### Executing the function

In [14]:
years = list(range(2000,2018,1))
years = ['{}'.format(year) for year in years]
for year in years: 
    temp_data_process(base_path,year)

Reading and converting temperate data to monthly average per location for 2000 is complete
Reading and converting temperate data to monthly average per location for 2001 is complete
Reading and converting temperate data to monthly average per location for 2002 is complete
Reading and converting temperate data to monthly average per location for 2003 is complete
Reading and converting temperate data to monthly average per location for 2004 is complete
Reading and converting temperate data to monthly average per location for 2005 is complete
Reading and converting temperate data to monthly average per location for 2006 is complete
Reading and converting temperate data to monthly average per location for 2007 is complete
Reading and converting temperate data to monthly average per location for 2008 is complete
Reading and converting temperate data to monthly average per location for 2009 is complete
Reading and converting temperate data to monthly average per location for 2010 is complete