In [5]:
import os
import pandas as pd
import numpy as np
import osmnx as ox
from datetime import datetime 
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from geopy.distance import distance
from shapely.geometry import Point
from shapely.geometry import LineString
import math
from pyproj import Geod
import geopandas as gpd
from tqdm import tqdm

### Finding mobile data points which are less than 2 km away from najafgarh station

In [6]:
stations = pd.read_excel(r"E:\Thesis Codes\POI Data\stations_POI.xlsx")
stations.head()

Unnamed: 0,id,area_education,area_entertainment,area_fuel,area_green,area_waste,count_education,count_entertainment,count_fuel,StationName,Latitude,Longitude
0,189,75136,509,0,69912,0,3,9,3,"Najafgarh, Delhi DPCC",28.607574,76.978486
1,235,0,0,0,0,0,0,0,0,"Mundka, Delhi DPCC",28.684722,77.020662
2,239,373057,485,0,341411,0,2,3,0,"NSIT Dwarka, Delhi CPCB",28.61036,77.035145
3,255,27243,0,0,232597,0,2,0,0,Bawana Delhi DPCC,28.794785,77.04699
4,291,53198,0,39,907554,0,5,0,3,"DwarkaSector 8, Delhi DPCC",28.571359,77.071282


In [7]:
lat_njf, long_njf = stations.loc[20]['Latitude'], stations.loc[20]['Longitude']

In [8]:
lat_njf, long_njf

(28.5899126472458, 77.2205670963329)

In [9]:
realtime = pd.read_csv(r"realtime_data_DEC.csv")
realtime2 = pd.read_csv(r"realtime_data_JAN.csv")
realtime.append(realtime2, ignore_index=True)
realtime.drop('Unnamed: 0', axis = 1, inplace = True)

In [10]:
realtime = realtime[realtime.lat != 0]
realtime.reset_index(drop = True, inplace=True)

In [11]:
import math

def haversine(lat1, lon1, lat2, lon2):
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a))

    # Radius of earth in kilometers. Use 3956 for miles
    r = 6371

    # calculate the result
    distance = c * r
    return distance


In [12]:
def find_distance(df, lat, long):
    d = []
    for i in range(len(df)):
        point1 = (df.loc[i,'long'], df.loc[i,'lat'])
        point2 = (long, lat)
        dist = haversine(lat_njf,long_njf,realtime.loc[i,'lat'],realtime.loc[i,'long'])
        d.append(dist)
    df['dist'] = d
    return df

In [13]:
realtime = find_distance(realtime, lat_njf, long_njf)

In [14]:
realtime = realtime[realtime.dist <= 2]
realtime.reset_index(drop=True, inplace=True)

### Joining statc PM 2.5 with the PM 2.5 values obtained from dynamic data points

In [15]:
df_njf = pd.read_excel(r"C:\Users\vinee\Downloads\Thesis\PM_lodhi_road.xlsx")

In [16]:
realtime.head()

Unnamed: 0,lat,long,last_updated,PM2.5,dist
0,28.579305,77.233188,2022-12-01 00:00:00,103,1.705838
1,28.579325,77.228098,2022-12-01 00:00:00,269,1.388066
2,28.579571,77.22569,2022-12-01 00:00:00,143,1.25402
3,28.579235,77.23197,2022-12-01 00:00:00,128,1.627678
4,28.579305,77.233188,2022-12-01 00:15:00,103,1.705838


In [17]:
realtime.rename(columns = {'last_updated':'From Date'}, inplace = True)

In [18]:
df_njf

Unnamed: 0,From Date,PM2.5
0,01-12-2022 00:00,176.04
1,01-12-2022 00:15,177.26
2,01-12-2022 00:30,180.85
3,01-12-2022 00:45,198.68
4,01-12-2022 01:00,213.42
...,...,...
5947,31-01-2023 22:45,45.66
5948,31-01-2023 23:00,46.76
5949,31-01-2023 23:15,47.6
5950,31-01-2023 23:30,50.5


In [19]:
df_njf['From Date'] = df_njf['From Date'].apply(lambda x : datetime.strptime(x, '%d-%m-%Y %H:%M'))
realtime['From Date'] = realtime['From Date'].apply(lambda x : datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))

In [20]:
df = pd.merge(df_njf, realtime, on = 'From Date', how = 'inner')
df.head()

Unnamed: 0,From Date,PM2.5_x,lat,long,PM2.5_y,dist
0,2022-12-01 00:00:00,176.04,28.579305,77.233188,103,1.705838
1,2022-12-01 00:00:00,176.04,28.579325,77.228098,269,1.388066
2,2022-12-01 00:00:00,176.04,28.579571,77.22569,143,1.25402
3,2022-12-01 00:00:00,176.04,28.579235,77.23197,128,1.627678
4,2022-12-01 00:15:00,177.26,28.579305,77.233188,103,1.705838


In [21]:
df.rename(columns={'PM2.5_x' : 'PM2.5 static', 'PM2.5_y' : 'PM2.5 mobile'}, inplace=True)

In [22]:
def find_midpoint(lat1, lng1, lat2, lng2):
    lat_mid = (lat1 + lat2) / 2
    long_mid = (lng1 + lng2) / 2
    return lat_mid, long_mid

### Checking mid point formula is giving correct results or not

In [19]:
lat_mid, long_mid = find_midpoint(lat_njf, long_njf, df.loc[2, 'lat'], df.loc[2,'long'])
print(lat_mid, long_mid)

28.5847418236229 77.22312854816644


In [20]:
import folium
m = folium.Map(location=[lat_njf, long_njf], zoom_start=13)
folium.Marker(location=[lat_njf, long_njf], popup='Point 1').add_to(m)
folium.Marker(location=[df.loc[2, 'lat'], df.loc[2,'long']], popup='Point 2').add_to(m)
folium.Marker(location=[lat_mid, long_mid], popup='Mid point 1').add_to(m)
m

### Code to add POIs(count), LandUse(area), water, buildings_area in our data

In [21]:
# Code to filter dataframe on basis of the buffer formed by joining the line from the static monitor to the location of data point generated by mobile monitors
def buffer_point(df, lat, long, buffer_size):
    df['points'] = df.apply(lambda x: [y for y in x['geometry'].coords], axis=1)
    count = 0
    for i in range(len(df)):
        point1 = (df.loc[i,'points'][0][0], df.loc[i,'points'][0][1])
        point2 = (long, lat)
        d = haversine(lat_njf, long_njf , df.loc[i,'points'][0][1] , df.loc[i,'points'][0][0])
        if d <= buffer_size:
            count += 1
    return count

# Code to filter dataframe on basis of the buffer formed by joining the line from the static monitor to the location of data point generated by mobile monitors
def buffer_polygon(df, point, buffer_size):
    df = df.to_crs({'init': 'epsg:3857'})
    point = gpd.GeoSeries(point, crs='epsg:4326').to_crs(epsg=3857).iloc[0]
    buffer = point.buffer(buffer_size)
    df = df[df.intersects(buffer)]
    if df.shape[0] == 0:
        return 0
    else:
        df['area_sq_m'] = df['geometry'].area
        return df['area_sq_m'].sum()/(10**6)


In [23]:
def add_POI(df):
    poi = []
    for i in tqdm(range(len(df))):
        poi_gdf = gpd.read_file(r"C:\Users\vinee\Downloads\Thesis\SHP Files\poi_count.shp")
        lat_mid, long_mid = find_midpoint(lat_njf, long_njf, df.loc[i, 'lat'], df.loc[i,'long'])
        buffer_size = df.dist[i]/2
        poi.append(buffer_point(poi_gdf, lat_mid, long_mid, buffer_size))
    df['POI'] = poi

In [34]:
add_POI(df)

ValueError: Metadata inference failed in `min`.

You have supplied a custom function and Dask is unable to 
determine the type of output that that function returns. 

To resolve this please provide a meta= keyword.
The docstring of the Dask function you ran should have more information.

Original error is below:
------------------------
TypeError('cannot perform min with type geometry')

Traceback:
---------
  File "C:\Users\vinee\anaconda3\envs\Thesis\lib\site-packages\dask\dataframe\utils.py", line 193, in raise_on_meta_error
    yield
  File "C:\Users\vinee\anaconda3\envs\Thesis\lib\site-packages\dask\dataframe\core.py", line 6797, in _emulate
    return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))
  File "C:\Users\vinee\anaconda3\envs\Thesis\lib\site-packages\dask\utils.py", line 1105, in __call__
    return getattr(__obj, self.method)(*args, **kwargs)
  File "C:\Users\vinee\anaconda3\envs\Thesis\lib\site-packages\pandas\core\generic.py", line 11965, in min
    return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs)
  File "C:\Users\vinee\anaconda3\envs\Thesis\lib\site-packages\pandas\core\generic.py", line 11365, in min
    return self._stat_function(
  File "C:\Users\vinee\anaconda3\envs\Thesis\lib\site-packages\pandas\core\generic.py", line 11353, in _stat_function
    return self._reduce(
  File "C:\Users\vinee\anaconda3\envs\Thesis\lib\site-packages\pandas\core\series.py", line 4797, in _reduce
    return delegate._reduce(name, skipna=skipna, **kwds)
  File "C:\Users\vinee\anaconda3\envs\Thesis\lib\site-packages\geopandas\array.py", line 1331, in _reduce
    raise TypeError(


In [32]:
df

Unnamed: 0,From Date,PM2.5 static,lat,long,PM2.5 mobile,dist
0,2022-12-01 00:00:00,176.04,28.579305,77.233188,103,1.705838
1,2022-12-01 00:00:00,176.04,28.579325,77.228098,269,1.388066
2,2022-12-01 00:00:00,176.04,28.579571,77.225690,143,1.254020
3,2022-12-01 00:00:00,176.04,28.579235,77.231970,128,1.627678
4,2022-12-01 00:15:00,177.26,28.579305,77.233188,103,1.705838
...,...,...,...,...,...,...
7672,2022-12-31 23:45:00,122.35,28.579468,77.234193,184,1.766051
7673,2022-12-31 23:45:00,122.35,28.579571,77.225690,143,1.254020
7674,2022-12-31 23:45:00,122.35,28.579400,77.234454,116,1.790257
7675,2022-12-31 23:45:00,122.35,28.579183,77.232886,177,1.694185


In [26]:
# Code to convert a multipolygon in water shape file to polygon
def process(gdf):
    # Get the index of the row containing the MultiPolygon geometry
    idx = gdf.index[gdf.geometry.type == 'MultiPolygon']
    for i in idx:
        gdf = gdf.explode(index=[i]).drop(i)
        gdf = gdf.reset_index(drop=True)
    return gdf

def add_landuse(df):
    area = []
    for i in tqdm(range(len(df))):
        landuse_gdf = gpd.read_file(r"C:\Users\vinee\Downloads\Thesis\SHP Files\LandUse.shp")
        landuse_gdf = process(landuse_gdf)
        lat_mid, long_mid = find_midpoint(lat_njf, long_njf, df.loc[i, 'lat'], df.loc[i,'long'])
        buffer_size = df.dist[i]/2
        buffer_size = buffer_size*(10**3) #Converting buffer size in metres
        point = Point(long_mid,lat_mid)
        area.append(buffer_polygon(landuse_gdf, point, buffer_size))
    df['landuse(sq_km)'] = area

In [27]:
add_landuse(df)

100%|████████████████████████████████████████████████████████████████████████████| 7677/7677 [4:40:23<00:00,  2.19s/it]


In [28]:
def add_water(df):
    area = []
    for i in tqdm(range(len(df))):
        landuse_gdf = gpd.read_file(r"C:\Users\vinee\Downloads\Thesis\SHP Files\water.shp")
        gdf = process(landuse_gdf)
        lat_mid, long_mid = find_midpoint(lat_njf, long_njf, df.loc[i, 'lat'], df.loc[i,'long'])
        buffer_size = df.dist[i]/2
        buffer_size = buffer_size*(10**3) #Converting buffer size in metres
        point = Point(long_mid,lat_mid)
        try:
            area.append(buffer_polygon(gdf, point, buffer_size))
        except:
            area.append(0)
    df['water(sq_km)'] = area

In [29]:
add_water(df)

100%|██████████████████████████████████████████████████████████████████████████████| 7677/7677 [13:56<00:00,  9.18it/s]


In [30]:
def add_buildings(df):
    area = []
    for i in tqdm(range(len(df))):
        landuse_gdf = gpd.read_file(r"C:\Users\vinee\Downloads\Thesis\SHP Files\buildings_area.shp")
        lat_mid, long_mid = find_midpoint(lat_njf, long_njf, df.loc[i, 'lat'], df.loc[i,'long'])
        buffer_size = df.dist[i]/2
        buffer_size = buffer_size*(10**3) #Converting buffer size in metres
        point = Point(long_mid,lat_mid)
        area.append(buffer_polygon(landuse_gdf, point, buffer_size))
    df['buildings(sq_km)'] = area

In [31]:
add_buildings(df)

 12%|█████████▏                                                                | 948/7677 [6:53:12<48:52:57, 26.15s/it]


KeyboardInterrupt: 

In [None]:
df.to_csv(r'df_poi_land_buildings.csv')

### Code to plot the shapefile (grey color) and the common area colored blue

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point

# Set the CRS of the landuse_gdf to EPSG:3857
landuse_gdf = gpd.read_file(r"C:\Users\vinee\Downloads\Thesis\SHP Files\LandUse.shp")
landuse_gdf = landuse_gdf.to_crs(epsg=3857)

# Create a shapely point object representing the point of interest
point = Point(long_njf, lat_njf)

# Set the CRS of the point to EPSG:3857
point = gpd.GeoSeries(point, crs='epsg:4326').to_crs(epsg=3857).iloc[0]

# Create a buffer around the point
buffer = point.buffer(1000)  # buffer radius in meters

# Select the landuse polygons that intersect with the buffer
intersected = landuse_gdf[landuse_gdf.intersects(buffer)]

# Create a new geopandas GeoDataFrame with the buffer polygon
buffer_gdf = gpd.GeoDataFrame(geometry=[buffer])

# Plot the buffer and the intersected polygons together
fig, ax = plt.subplots(figsize=(10, 10))
ax.set_aspect('equal')
landuse_gdf.plot(ax=ax, color='lightgray', edgecolor='black')
intersected.plot(ax=ax, color='blue')
#buffer_gdf.plot(ax=ax, color='red')
plt.show()

In [None]:
df.head(10)

In [None]:
df_final = pd.read_csv(r"C:\Users\vinee\Downloads\Thesis\df_thesis.csv")
df_final['From Date'] = df_final['From Date'].apply(lambda x : datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
df_final['len_low_congestion'] = 0
df_final['len_medium_congestion'] = 0
df_final['len_high_congestion'] = 0

### Adding length of roads according to congestion factor

In [None]:
def traffic(lat, long, buffer_radius):
    road_gdf = gpd.read_file(r"C:\Users\vinee\Downloads\Thesis\SHP Files\roads.shp")
    road_gdf = road_gdf.to_crs({'init': 'epsg:3857'})
    point = Point(long, lat)
    point = gpd.GeoSeries(point, crs='epsg:4326').to_crs(epsg=3857).iloc[0]
    buffer = point.buffer(buffer_radius)
    intersected = road_gdf[road_gdf.intersects(buffer)]
    intersected = intersected.reset_index(drop=True)
    return intersected

def date_time(date_obj):
    return datetime.strftime(date_obj, '%d-%b-%Y-%H-%M')

def to_date_time(x):
    return datetime.strptime(x,'%d-%b-%Y-%H')

In [None]:
files = os.listdir(r"E:\HERE")
d = {'00':[], '15':[], '30':[], '45':[]}
for i in range(len(files)):
    dist = 1e3
    key = ''
    for j in d:
        diff = abs(int(files[i][-6:-4]) - int(j))
        if diff < dist:
            key = j
            dist = diff
    d[key].append(i)
l1 = d['00']
l2 = d['15']
l3 = d['30']
l4 = d['45']

lst_files = []
for i in l1:
    lst_files.append(files[i])
for i in l2:
    lst_files.append(files[i])
for i in l3:
    lst_files.append(files[i])
for i in l4:
    lst_files.append(files[i])

In [None]:
df_final = df.copy()
for j in tqdm(range(len(df_final))):
    dt = df_final.loc[j,'From Date']
    lat_mid, long_mid = find_midpoint(lat_njf, long_njf, df_final.loc[j, 'lat'], df_final.loc[j,'long'])
    buffer_size = df_final.dist[j]/2
    buffer_size = buffer_size*(10**3)
    intersected = traffic(lat_mid, long_mid, buffer_size)
    for i in lst_files:
        congestion = []
        str_dt_time = i[:-7]
        dt_time = to_date_time(str_dt_time)
        if dt_time == dt:
            path = "E:\\HERE\\" + i
            df = pd.read_csv(path)
            all_roads = df['Names'].unique()
            for i in range(len(intersected)):
                if(intersected.loc[i,'name'] in all_roads):
                    ff = df[df['Names']==intersected.loc[i,'name']].ff.mean()
                    su = df[df['Names']==intersected.loc[i,'name']].su.mean()
                    if su == 0:
                        congestion.append(0)
                    else:
                        val = ff/su
                        if val > 1:
                            congestion.append(1)
                        else:
                            congestion.append(val)
                else:
                    congestion.append(0)
            intersected['congestion_factor'] = congestion
            congestion_low = intersected[intersected['congestion_factor'] <= 0.3]
            congestion_low = congestion_low[congestion_low.name.isna()]
            congestion_low = congestion_low.reset_index(drop=True)
            congestion_medium = intersected[(intersected['congestion_factor'] > 0.3) & (intersected['congestion_factor'] <= 0.7)]
            congestion_medium = congestion_medium.reset_index(drop=True)
            congestion_high = intersected[intersected['congestion_factor'] > 0.7]
            congestion_high = congestion_high.reset_index(drop=True)
            df_final.at[j,'len_low_congestion'] = congestion_low.geometry.length.sum()/(1000)
            df_final.at[j,'len_medium_congestion'] = congestion_medium.geometry.length.sum()/(1000)
            df_final.at[j,'len_high_congestion'] = congestion_high.geometry.length.sum()/(1000)
            break

In [None]:
df

In [None]:
df_final.to_csv('df4.csv')

In [None]:
df_final[df_final.len_medium_congestion > 0]

In [None]:
df_final

In [None]:
df_final

In [4]:
landuse_gdf = gpd.read_file(r"C:\Users\vinee\Downloads\Thesis\SHP Files\LandUse.shp")
landuse_gdf.head(20)

Unnamed: 0,osm_id,code,fclass,name,geometry
0,24618045,7211,recreation_ground,Metro Walk Mall,"POLYGON ((77.11182 28.72420, 77.11443 28.72711..."
1,24676525,7202,park,,"POLYGON ((77.17220 28.64550, 77.17293 28.64642..."
2,3760138,7203,residential,,"POLYGON ((77.16699 28.50790, 77.16761 28.50990..."
3,25106566,7203,residential,,"POLYGON ((77.11671 28.53854, 77.11768 28.54033..."
4,26585444,7211,recreation_ground,Vasant Kunj Sports Complex,"POLYGON ((77.16021 28.51253, 77.16023 28.51385..."
5,28208892,7210,nature_reserve,Okhla Bird Sanctuary,"MULTIPOLYGON (((77.29694 28.56310, 77.29558 28..."
6,31565401,7203,residential,C block,"POLYGON ((77.18551 28.53852, 77.18558 28.53870..."
7,31565409,7203,residential,A block,"POLYGON ((77.18524 28.53840, 77.18529 28.53852..."
8,31565416,7203,residential,A block,"POLYGON ((77.18459 28.53860, 77.18466 28.53874..."
9,31565445,7203,residential,B block,"POLYGON ((77.18450 28.53897, 77.18456 28.53908..."
