In [117]:
import pandas as pd
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows', 100)

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


In [93]:
sensorlocations = ["Jharoda_Kalan","DTC_bus_terminal","Nangli_Dairy","ShaheenBagh","Sanjay_Colony_2","Tekhand2"]
cpcbLocations = ["najafgarh","okhla"]
allLocations = sensorlocations + cpcbLocations
sensorParameters = ["pm1_0","pm2_5","pm10","temp","humid"]

def getRawData():
    allData = "https://raw.githubusercontent.com/sudhirattri/aq-data-scraper/main/eziodata/combined/db.csv"
    densityStatsOnly = "https://raw.githubusercontent.com/sudhirattri/aq-data-scraper/main/eziodata/combined/densityStats.csv"
    sensorData = pd.read_csv(allData,sep=',')
    densityStats = pd.read_csv(densityStatsOnly,sep=',') 
    
    cpcbLinks = [
    "https://raw.githubusercontent.com/sudhirattri/aq-data-scraper/main/eziodata/combined/naja_pollutants.csv",
    "https://raw.githubusercontent.com/sudhirattri/aq-data-scraper/main/eziodata/combined/naja_meter.csv",
    "https://raw.githubusercontent.com/sudhirattri/aq-data-scraper/main/eziodata/combined/okla_pollutants.csv",
    "https://raw.githubusercontent.com/sudhirattri/aq-data-scraper/main/eziodata/combined/okla_meter.csv"
    ]

    okla_pollutants = pd.read_csv(cpcbLinks[2],sep=',')
    okla_meter = pd.read_csv(cpcbLinks[3],sep=',')
    naja_pollutants = pd.read_csv(cpcbLinks[0],sep=',')
    naja_meter = pd.read_csv(cpcbLinks[1],sep=',')
    
    return sensorData,okla_pollutants,okla_meter,naja_pollutants,naja_meter

def combineCPCB(okla_pollutants,okla_meter,naja_pollutants,naja_meter):
    okla_cpcb = pd.merge(okla_pollutants, okla_meter, on='To Date', how='outer')
    naja_cpcb = pd.merge(naja_pollutants, naja_meter, on='To Date', how='outer')
    
    naja_cpcb = naja_cpcb.rename(columns={'To Date': 'timestamp', 'PM2.5': 'pm2_5','PM10': 'pm10'})
    okla_cpcb = okla_cpcb.rename(columns={'To Date': 'timestamp', 'PM2.5': 'pm2_5','PM10': 'pm10'})
    cpcbData = {"najafgarh":naja_cpcb,"okhla":okla_cpcb}
    return cpcbData
    
def partDataByLoc(sensorData,cpcbData):
    allData = {}
    for loc in sensorlocations:
        allData[loc] = sensorData[sensorData["location"]==loc].copy()
    allData["najafgarh"] = cpcbData["najafgarh"]
    allData["okhla"] = cpcbData["okhla"]
    return allData

def convertTimeStamp(allData):
    format="%m/%d/%Y"
    for location in allData:
        allData[location]['timestamp'] = (allData[location]["timestamp"].apply(lambda d: pd.to_datetime(str(d))))
        allData[location] = allData[location].sort_values(by='timestamp',ascending=True)
    return allData


### Get Raw Data (download step, run once only)

In [3]:
sensorData,okla_pollutants,okla_meter,naja_pollutants,naja_meter = getRawData()

### Everything below OK

In [94]:
cpcbData = combineCPCB(okla_pollutants,okla_meter,naja_pollutants,naja_meter)
allData = partDataByLoc(sensorData,cpcbData)
allData = convertTimeStamp(allData)

In [144]:
def getIntForTimeDate(row):
    timeFromStart = row["timestamp"].hour + (row["timestamp"].minute/60.0)
    return (timeFromStart) / 24.0
def getFractionalDayForTimeDate(row):
    timeFromStart = row["timestamp"].hour + (row["timestamp"].minute/60.0)
    fractionDay = (timeFromStart) / 24.0
    if(row["timestamp"].month==10):
        return row["timestamp"].day + fractionDay
    else:
        return 31.0 + row["timestamp"].day + fractionDay
    
for locs in allLocations:
    allData[locs].index = np.arange(len(allData[locs]))
    allData[locs].loc[:,"integerTime"] = allData[locs].apply(lambda row: getIntForTimeDate(row), axis=1)
    allData[locs].loc[:,"fractionalDay"] = allData[locs].apply(lambda row: getFractionalDayForTimeDate(row), axis=1)

In [None]:
fig, ax = plt.subplots(6,1,figsize=(10,60))
for i in range(6):
    x = allData[sensorlocations[i]]["fractionalDay"]
    x_ticks = map(lambda d: str(int(d))+' Oct' if d < 32.0 else str(int(d)-32)+' Nov', x)
    ax[i].set_xticks(x)
    ax[i].set_xticklabels(x_ticks)
    y = allData[sensorlocations[i]]["pm2_5"]
    ax[i].plot(x,y,label="PM 2.5")