In [1]:
import pandas as pd
import plotly.express as px
import folium
import json
from urllib.request import urlopen
import plotly.graph_objects as go
import plotly.figure_factory as fgp
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Reading data

In [2]:
# Counties provided by fips codes
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

In [3]:
# Retrieving cities data from geojson github
cities_data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')

In [4]:
"""
City_info: general data of the precip data process;
    cols:
    pop:dummy column for testing the scatter_map with scatter_geo

Main_data: Information by date of the prceip max and min temperature (10+ million rows)
"""
city_info = pd.read_csv('./Datak/Processed_data/city_info.csv')
city_info['pop'] =  np.random.randint(1, 40, city_info.shape[0])
city_info = city_info.drop('Unnamed: 0',axis=1)

Main_data = pd.read_pickle('./Datak/Processed_data/compiled_data.pkl')
Main_data = Main_data.drop('Unnamed: 0',axis=1)

# data example 

In [5]:
city_info.head()

Unnamed: 0,Name,ID,Lat,Lon,pop
0,Lander,USW00024021,42.8153,-108.7261,6
1,Lander,USW00024018,42.8153,-108.7261,8
2,Cheyenne,USW00014897,41.1519,-104.8061,22
3,Cheyenne,USW00094973,41.1519,-104.8061,4
4,Wausau,USW00014991,44.9258,-89.6256,32


In [6]:
Main_data.head()

Unnamed: 0,ID,Date,tmax,tmin,prcp,Name,Lat,Lon
0,USC00042863,1894-01-01,60.0,41.0,0.0,Reno,39.4839,-119.7711
1,USC00042863,1894-01-02,58.0,50.0,0.4,Reno,39.4839,-119.7711
2,USC00042863,1894-01-03,57.0,42.0,0.0,Reno,39.4839,-119.7711
3,USC00042863,1894-01-04,53.0,42.0,0.28,Reno,39.4839,-119.7711
4,USC00042863,1894-01-05,50.0,38.0,0.0,Reno,39.4839,-119.7711


In [7]:
cities_data.head()

Unnamed: 0,name,pop,lat,lon
0,New York,8287238,40.730599,-73.986581
1,Los Angeles,3826423,34.053717,-118.242727
2,Chicago,2705627,41.875555,-87.624421
3,Houston,2129784,29.758938,-95.367697
4,Philadelphia,1539313,39.952335,-75.163789


## exploratory analysis of cities_data pop

In [8]:
# Creating a rule where all the cities with the quantile > 0.25 are accepted
citiesN = cities_data.copy()
city_quantiles = citiesN.quantile([0.25,0.50,0.75])
citiesN25 = citiesN[citiesN['pop'] > city_quantiles['pop'][0.25]]
q25 = citiesN25.quantile([0.25])
citiesN25 = citiesN25[citiesN25['pop'] > q25['pop'][0.25]]
citiesN25.head()

Unnamed: 0,name,pop,lat,lon
0,New York,8287238,40.730599,-73.986581
1,Los Angeles,3826423,34.053717,-118.242727
2,Chicago,2705627,41.875555,-87.624421
3,Houston,2129784,29.758938,-95.367697
4,Philadelphia,1539313,39.952335,-75.163789


# Plotting the station points in different types of map for analyzing the functionality of each strategy

# Cities_data plotting preprocess

In [12]:
citiesN25_lat = citiesN25['lat']
citiesN25_lon = citiesN25['lon']
citiesN25_names = citiesN25['name']

In [10]:
city_lat = city_info.Lat
city_Lon = city_info.Lon
city_names = city_info.Name
d1 =city_info['Lat'][1]
d2 = city_info['Lon'][1]
cities_data_lat = cities_data.lat
cities_data_lon = cities_data.lon

In [11]:
m = folium.Map(location=[d1,d2], tiles='OpenStreetMap')
for i in range(len(city_info)):
    folium.Marker(
    [city_lat[i], city_Lon[i]],
    popup=f"{city_info.Name[i]}",
    fill=True,
    fill_color="#3186cc",
    size=5
).add_to(m)
    folium.Circle(
    [city_lat[i], city_Lon[i]],
    radius=50,
    popup=f"{city_info.Name[i]}",
    fill=True,
    fill_color="#3186cc",
    size=5
).add_to(m)


fig = go.Figure(px.scatter_geo(lat=city_lat,lon=city_Lon,text=city_names,size=city_info['pop']))
fig.update_geos(scope='usa',lataxis_showgrid=True)
