In [1]:
# Dependencies
import json
import requests
#from config import api_key
#from citipy import citipy
from random import random
import pandas as pd
#import matplotlib.pyplot as plt
import numpy as np
import datetime
from sqlalchemy import create_engine

In [2]:
# Load Kaggle rainfall csv into data frame
csv_file = "chennai_reservoir_rainfall.csv"
rainfall_df = pd.read_csv(csv_file)
rainfall_df = rainfall_df[rainfall_df['Date'].str.contains('2019')]
rainfall_df.head()

Unnamed: 0,Date,POONDI,CHOLAVARAM,REDHILLS,CHEMBARAMBAKKAM
5479,01-01-2019,0.0,0.0,0.0,0.0
5480,02-01-2019,0.0,0.0,0.0,0.0
5481,03-01-2019,0.0,0.0,0.0,0.0
5482,04-01-2019,0.0,0.0,0.0,0.0
5483,05-01-2019,0.0,0.0,0.0,0.0


In [3]:
# Rename column names to keep them short
replace_cols = {'Date':'date', 'POONDI':'pdy','CHOLAVARAM':'clv','REDHILLS':'rdh','CHEMBARAMBAKKAM':'cbr'}
rainfall_df = rainfall_df.rename(columns=replace_cols)
rainfall_df.head()

Unnamed: 0,date,pdy,clv,rdh,cbr
5479,01-01-2019,0.0,0.0,0.0,0.0
5480,02-01-2019,0.0,0.0,0.0,0.0
5481,03-01-2019,0.0,0.0,0.0,0.0
5482,04-01-2019,0.0,0.0,0.0,0.0
5483,05-01-2019,0.0,0.0,0.0,0.0


In [4]:
# Create postgres connection
rds_connection_string = "postgres:Password00@localhost:5432/etl"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [5]:
# Check postgres connection
engine.table_names()

['customer_name', 'customer_location', 'rainfall', 'weather', 'monthlyagg']

In [6]:
# Load to rainfall table
rainfall_df.to_sql(name='rainfall', con=engine, if_exists='append', index=False)

In [7]:
#Build ~300 records  dataframe. Use random lat & Lon but assume it is for the same city and for each date starting 2019-01-01

Lat = np.random.uniform(low=-90.000, high=90.000, size=300)
Lng = np.random.uniform(low=-180.000, high=180.000, size=300)

LocationDf = pd.DataFrame( 
     list(zip(Lat, Lng) ), columns = ['Lat','Lng'])
LocationDf['Date'] = ''
LocationDf['Max Temp'] = ''


#LocationDf



In [8]:
# Insert dates starting from Jan 1 2019

starttime = datetime.date(2019,1,1)
for index,row in LocationDf.iterrows():
    #LocationDf.loc[index,'Date'] = (starttime + datetime.timedelta(days=index)).strftime("%#d/%#m/%Y")
    LocationDf.loc[index,'Date'] = (starttime + datetime.timedelta(days=index)).strftime("%d-%m-%Y")
    
#LocationDf.groupby(['City','Country'])['City','Country'].count()

#LocationDf.dropna(subset=['City'], axis='rows', inplace=True)
LocationDf.head()


Unnamed: 0,Lat,Lng,Date,Max Temp
0,66.744436,58.928541,01-01-2019,
1,82.16939,-52.903404,02-01-2019,
2,-26.040905,-164.586494,03-01-2019,
3,34.983758,112.778055,04-01-2019,
4,44.842481,123.147619,05-01-2019,


In [9]:
# Build  query URL to get temperature for each day
baseurl = "http://api.openweathermap.org/data/2.5/weather"
params = {
    'units' : "imperial",
    #'appid' : api_key,
    'appid' : 'c68339f00f348d66865e58c8790c290a',
 #   'city'  : "Chennai"
}



for index,row in LocationDf.iterrows():
    #city = f"{row['City']},{row['Country']}"
    #params['q'] = city
    params['lat'] = row['Lat']
    params['lon'] = row['Lng']
    #print(params)
    response = requests.get(baseurl,params=params).json()
    #print(f"Processing record {i} | {city}")
    print(f"Requesting for {baseurl} with parameters {params}")
    LocationDf.loc[index,'Max Temp'] = response['main']['temp_max']
      
LocationDf.head()



Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 66.74443566205144, 'lon': 58.928541056168996}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 82.1693902367538, 'lon': -52.90340371693226}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -26.04090475034561, 'lon': -164.58649389585918}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 34.98375759700795, 'lon': 112.77805509201994}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 44.84248112999134, 'lon': 123.14761900259248}
Requestin

Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 18.346986793381333, 'lon': 144.11812452620734}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -25.76054797736208, 'lon': 74.51827616372731}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -82.44192782006708, 'lon': -40.027711658996054}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 73.99875067514961, 'lon': 10.615689872598324}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 16.66273438755306, 'lon': -95.13724869386594}
Request

Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 16.75706870227502, 'lon': 79.67070946299367}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -38.93404087492905, 'lon': -23.83408491720047}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -18.751725018540924, 'lon': -10.954962279809052}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 78.2795941554163, 'lon': -151.327621079069}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -61.647977159481854, 'lon': 157.247516328298}
Requestin

Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 44.085798451278805, 'lon': -141.21527091847116}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 42.553708574174635, 'lon': 17.68946754575066}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 89.92452232627252, 'lon': -154.1864470488515}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 76.08140722012257, 'lon': 1.5394902320607287}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 36.80822600147856, 'lon': -42.53364570602227}
Requesti

Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -89.17756538701025, 'lon': -4.674150438287626}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -75.34826728014218, 'lon': 24.00239156815178}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 5.973302998245657, 'lon': 17.820422122476174}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -32.98782130822263, 'lon': 40.39319366337634}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -22.924197950920302, 'lon': 153.1247570313144}
Requesti

Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -48.63195630666858, 'lon': -60.13507624042913}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -27.555728576620595, 'lon': -172.4989186120802}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -35.31312484027191, 'lon': -159.99077657870407}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -18.977354722621385, 'lon': -62.568341429905814}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -32.05058271018182, 'lon': -41.04815072443594}
R

Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -23.51173217242858, 'lon': 71.75040507486253}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -66.69869364880068, 'lon': -131.85428524017806}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -61.97042441696419, 'lon': -28.418221792324857}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -80.40708856895995, 'lon': 101.51725878846179}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 13.795960144312716, 'lon': -60.45528633416015}
Requ

Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 3.1958087827808157, 'lon': -20.78453633168334}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': 78.70068120799525, 'lon': 109.50251816615014}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -31.546962425864095, 'lon': -82.69494938886926}
Requesting for http://api.openweathermap.org/data/2.5/weather with parameters {'units': 'imperial', 'appid': 'c68339f00f348d66865e58c8790c290a', 'lat': -20.486832870057412, 'lon': -60.03411518986985}


Unnamed: 0,Lat,Lng,Date,Max Temp
0,66.744436,58.928541,01-01-2019,42.89
1,82.16939,-52.903404,02-01-2019,30.11
2,-26.040905,-164.586494,03-01-2019,72.59
3,34.983758,112.778055,04-01-2019,83.21
4,44.842481,123.147619,05-01-2019,63.41


In [10]:
# Extract only date and temperature
LocationDf = LocationDf[['Date' , 'Max Temp']]
LocationDf.head()



Unnamed: 0,Date,Max Temp
0,01-01-2019,42.89
1,02-01-2019,30.11
2,03-01-2019,72.59
3,04-01-2019,83.21
4,05-01-2019,63.41


In [11]:
#Rename column
LocationDf = LocationDf.rename(columns={'Date':'date','Max Temp': 'temp'})
LocationDf.head()

Unnamed: 0,date,temp
0,01-01-2019,42.89
1,02-01-2019,30.11
2,03-01-2019,72.59
3,04-01-2019,83.21
4,05-01-2019,63.41


In [12]:
# Load into weahter table
LocationDf.to_sql(name='weather', con=engine, if_exists='append', index=False)

In [13]:
# Combine the tables, Extract month component and save as separate column
MergedDf = LocationDf.merge(rainfall_df,on='date')

In [14]:
MergedDf['month'] = MergedDf['date'].str[3:]
MergedDf['month']

0      01-2019
1      01-2019
2      01-2019
3      01-2019
4      01-2019
5      01-2019
6      01-2019
7      01-2019
8      01-2019
9      01-2019
10     01-2019
11     01-2019
12     01-2019
13     01-2019
14     01-2019
15     01-2019
16     01-2019
17     01-2019
18     01-2019
19     01-2019
20     01-2019
21     01-2019
22     01-2019
23     01-2019
24     01-2019
25     01-2019
26     01-2019
27     01-2019
28     01-2019
29     01-2019
        ...   
138    05-2019
139    05-2019
140    05-2019
141    05-2019
142    05-2019
143    05-2019
144    05-2019
145    05-2019
146    05-2019
147    05-2019
148    05-2019
149    05-2019
150    05-2019
151    06-2019
152    06-2019
153    06-2019
154    06-2019
155    06-2019
156    06-2019
157    06-2019
158    06-2019
159    06-2019
160    06-2019
161    06-2019
162    06-2019
163    06-2019
164    06-2019
165    06-2019
166    06-2019
167    06-2019
Name: month, Length: 168, dtype: object

In [15]:
# create total or rainfall columns and average for temperature and save as data frame
rdhsum = MergedDf.groupby(MergedDf['month'])['rdh'].sum()
pdysum = MergedDf.groupby(MergedDf['month'])['pdy'].sum()
clvsum = MergedDf.groupby(MergedDf['month'])['clv'].sum()
cbrsum = MergedDf.groupby(MergedDf['month'])['cbr'].sum()
tempavg = MergedDf.groupby(MergedDf['month'])['temp'].sum()/MergedDf.groupby(MergedDf['month'])['temp'].count()

FinalDf = pd.DataFrame({ 'rdhsum' : rdhsum, 'pdysum' : pdysum , 'clvsum':clvsum,'cbrsum':cbrsum,'tempavg':tempavg})
FinalDf.reset_index(inplace=True)
FinalDf



Unnamed: 0,month,rdhsum,pdysum,clvsum,cbrsum,tempavg
0,01-2019,0.0,0.0,0.0,0.0,47.54
1,02-2019,0.0,4.0,0.0,0.0,46.862143
2,03-2019,0.0,0.0,0.0,0.0,55.623226
3,04-2019,1.0,6.0,3.0,0.0,52.285667
4,05-2019,0.0,27.0,0.0,0.0,40.751613
5,06-2019,0.0,0.0,0.0,0.0,61.598824


In [16]:
# Save data frame into monthlyagg table
FinalDf.to_sql(name='monthlyagg', con=engine, if_exists='append', index=False)

In [17]:
#alternate method
outputDf = pd.read_sql(con=engine, sql='select substring(r.date, 4,7) as month, sum(rdh), sum(pdy), sum(clv), sum(cbr),avg(temp) from rainfall r join weather w on r.date = w.date group by month order by month')
outputDf

Unnamed: 0,month,sum,sum.1,sum.2,sum.3,avg
0,01-2019,0.0,0.0,0.0,0.0,47.54
1,02-2019,0.0,4.0,0.0,0.0,46.862143
2,03-2019,0.0,0.0,0.0,0.0,55.623226
3,04-2019,1.0,6.0,3.0,0.0,52.285667
4,05-2019,0.0,27.0,0.0,0.0,40.751613
5,06-2019,0.0,0.0,0.0,0.0,61.598824
