In [1]:
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
import numpy as np

from math import radians
from numpy import sin, cos, arcsin, sqrt
from haversine import haversine, Unit

from datetime import datetime
from datetime import timedelta

import seaborn as sns

import pprint

In [2]:
lyft_df = pd.read_csv("data/lyft_df.csv").reset_index(drop=True)
bird_df = pd.read_csv("data/bird_df.csv").reset_index(drop=True)
lime_df = pd.read_csv("data/lime_df.csv").reset_index(drop=True)
all_df = pd.read_csv("data/months_concat.csv").reset_index(drop=True)

In [3]:
lyft_df.columns

Index(['Unnamed: 0', 'zip', 'rides', 'scooters', 'average rides'], dtype='object')

In [28]:
lyft_df.head()

Unnamed: 0.1,Unnamed: 0,zip,rides,scooters,average rides
0,0,37203,52635,1669,0.031709
1,1,37201,23014,1606,0.069784
2,2,37219,8722,1456,0.166934
3,3,37213,7929,1404,0.177072
4,4,37208,5082,1207,0.237505


In [4]:
all_df = all_df[['zip', 'rides', 'scooters', 'average rides']]

In [5]:
all_df['perc_of_total_rides'] = (all_df.rides / sum(all_df.rides))

In [6]:
all_df.head()

Unnamed: 0,zip,rides,scooters,average rides,perc_of_total_rides
0,37203,206923,8379,0.040493,0.450541
1,37201,82044,7689,0.093718,0.178637
2,37219,37407,6877,0.183843,0.081448
3,37212,30235,5507,0.18214,0.065832
4,37204,26101,3288,0.125972,0.056831


In [7]:
all_df['existing_alloc'] = (all_df.perc_of_total_rides * 5860)

In [8]:
all_df.head()

Unnamed: 0,zip,rides,scooters,average rides,perc_of_total_rides,existing_alloc
0,37203,206923,8379,0.040493,0.450541,2640.168743
1,37201,82044,7689,0.093718,0.178637,1046.814537
2,37219,37407,6877,0.183843,0.081448,477.282816
3,37212,30235,5507,0.18214,0.065832,385.773945
4,37204,26101,3288,0.125972,0.056831,333.027476


In [35]:
all_df['proposed_alloc'] = (all_df.existing_alloc * (all_df['average rides']*3))

In [36]:
all_df

Unnamed: 0,zip,rides,scooters,average rides,perc_of_total_rides,existing_alloc,proposed_alloc
0,37203,206923,8379,0.040493,0.450541,2640.168743,320.727622
1,37201,82044,7689,0.093718,0.178637,1046.814537,294.31611
2,37219,37407,6877,0.183843,0.081448,477.282816,263.234736
3,37212,30235,5507,0.18214,0.065832,385.773945,210.794488
4,37204,26101,3288,0.125972,0.056831,333.027476,125.856596
5,37208,19313,5272,0.272977,0.042051,246.418131,201.799263
6,37206,17504,4628,0.264397,0.038112,223.336766,177.148518
7,37213,17275,4942,0.286078,0.037613,220.414913,189.16767
8,37210,10694,4405,0.411913,0.023284,136.44672,168.612624
9,37240,3765,2200,0.584329,0.008198,48.03833,84.210618


In [37]:
sum(all_df.proposed_alloc)

2220.710551584338

In [12]:
def month_avgs():
    """This will import a unified csv file, create zip code column, filter data, remove unecessary columns, create a unique month df."""

In [44]:
all_df.to_csv("data/all_df.csv")

In [13]:
zip_code_avg = pd.read_pickle("data/zip_code_avg.pkl")

In [14]:
zip_code_avg.head(2)

Unnamed: 0,companyName,sumdID,Month,zip
0,Bird,Powered9EAJL,5,37203
1,Lyft,Powered296631,5,37203


In [15]:
pd.value_counts(zip_code_avg.Month)

5    198085
6    162406
7     98716
Name: Month, dtype: int64

In [16]:
pd.value_counts(zip_code_avg.companyName)

Lime             210018
Lyft             112978
Bird              93749
SPIN              33181
JUMP               5998
Gotcha             2690
Bolt Mobility       593
Name: companyName, dtype: int64

In [17]:
pd.value_counts(zip_code_avg.zip)

37203    206897
37201     82031
37219     37399
37212     30229
37204     26097
37208     19313
37206     17498
37213     17270
37210     10693
37240      3765
37232      2355
37207      2229
37209      1624
37215       453
37228       423
37216       368
37205       302
37211        85
37217        45
37013        39
37214        39
37218        23
37115        15
37221        10
37138         2
37220         2
37072         1
Name: zip, dtype: int64

In [18]:
zip_code_avg.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 459207 entries, 0 to 172539
Data columns (total 4 columns):
companyName    459207 non-null object
sumdID         459207 non-null object
Month          459207 non-null int64
zip            459207 non-null object
dtypes: int64(1), object(3)
memory usage: 17.5+ MB


In [26]:
May_37203 = zip_code_avg.loc[(zip_code_avg.zip == '37203') & (zip_code_avg.Month == 5)]
May_37201 = zip_code_avg.loc[(zip_code_avg.zip == '37201') & (zip_code_avg.Month == 5)]
May_37219 = zip_code_avg.loc[(zip_code_avg.zip == '37219') & (zip_code_avg.Month == 5)]
May_37212 = zip_code_avg.loc[(zip_code_avg.zip == '37212') & (zip_code_avg.Month == 5)]
May_37204 = zip_code_avg.loc[(zip_code_avg.zip == '37204') & (zip_code_avg.Month == 5)]
May_37208 = zip_code_avg.loc[(zip_code_avg.zip == '37208') & (zip_code_avg.Month == 5)]
May_37206 = zip_code_avg.loc[(zip_code_avg.zip == '37206') & (zip_code_avg.Month == 5)]
May_37213 = zip_code_avg.loc[(zip_code_avg.zip == '37213') & (zip_code_avg.Month == 5)]
May_37210 = zip_code_avg.loc[(zip_code_avg.zip == '37210') & (zip_code_avg.Month == 5)]
May_37240 = zip_code_avg.loc[(zip_code_avg.zip == '37240') & (zip_code_avg.Month == 5)]
May_37232 = zip_code_avg.loc[(zip_code_avg.zip == '37232') & (zip_code_avg.Month == 5)]
May_37207 = zip_code_avg.loc[(zip_code_avg.zip == '37207') & (zip_code_avg.Month == 5)]
May_37209 = zip_code_avg.loc[(zip_code_avg.zip == '37209') & (zip_code_avg.Month == 5)]
May_37215 = zip_code_avg.loc[(zip_code_avg.zip == '37215') & (zip_code_avg.Month == 5)]
May_37228 = zip_code_avg.loc[(zip_code_avg.zip == '37228') & (zip_code_avg.Month == 5)]
May_37216 = zip_code_avg.loc[(zip_code_avg.zip == '37216') & (zip_code_avg.Month == 5)]
May_37205 = zip_code_avg.loc[(zip_code_avg.zip == '37205') & (zip_code_avg.Month == 5)]
May_37211 = zip_code_avg.loc[(zip_code_avg.zip == '37211') & (zip_code_avg.Month == 5)]
May_37217 = zip_code_avg.loc[(zip_code_avg.zip == '37217') & (zip_code_avg.Month == 5)]
May_37013 = zip_code_avg.loc[(zip_code_avg.zip == '37013') & (zip_code_avg.Month == 5)]
May_37214 = zip_code_avg.loc[(zip_code_avg.zip == '37214') & (zip_code_avg.Month == 5)]
May_37218 = zip_code_avg.loc[(zip_code_avg.zip == '37218') & (zip_code_avg.Month == 5)]
May_37115 = zip_code_avg.loc[(zip_code_avg.zip == '37115') & (zip_code_avg.Month == 5)]
May_37221 = zip_code_avg.loc[(zip_code_avg.zip == '37221') & (zip_code_avg.Month == 5)]
May_37220 = zip_code_avg.loc[(zip_code_avg.zip == '37220') & (zip_code_avg.Month == 5)]
May_37138 = zip_code_avg.loc[(zip_code_avg.zip == '37138') & (zip_code_avg.Month == 5)]
May_37072 = zip_code_avg.loc[(zip_code_avg.zip == '37072') & (zip_code_avg.Month == 5)]

June_37203 = zip_code_avg.loc[(zip_code_avg.zip == '37203') & (zip_code_avg.Month == 5)]
June_37201 = zip_code_avg.loc[(zip_code_avg.zip == '37201') & (zip_code_avg.Month == 5)]
June_37219 = zip_code_avg.loc[(zip_code_avg.zip == '37219') & (zip_code_avg.Month == 5)]
June_37212 = zip_code_avg.loc[(zip_code_avg.zip == '37212') & (zip_code_avg.Month == 5)]
June_37204 = zip_code_avg.loc[(zip_code_avg.zip == '37204') & (zip_code_avg.Month == 5)]
June_37208 = zip_code_avg.loc[(zip_code_avg.zip == '37208') & (zip_code_avg.Month == 5)]
June_37206 = zip_code_avg.loc[(zip_code_avg.zip == '37206') & (zip_code_avg.Month == 5)]
June_37213 = zip_code_avg.loc[(zip_code_avg.zip == '37213') & (zip_code_avg.Month == 5)]
June_37210 = zip_code_avg.loc[(zip_code_avg.zip == '37210') & (zip_code_avg.Month == 5)]
June_37240 = zip_code_avg.loc[(zip_code_avg.zip == '37240') & (zip_code_avg.Month == 5)]
June_37232 = zip_code_avg.loc[(zip_code_avg.zip == '37232') & (zip_code_avg.Month == 5)]
June_37207 = zip_code_avg.loc[(zip_code_avg.zip == '37207') & (zip_code_avg.Month == 5)]
June_37209 = zip_code_avg.loc[(zip_code_avg.zip == '37209') & (zip_code_avg.Month == 5)]
June_37215 = zip_code_avg.loc[(zip_code_avg.zip == '37215') & (zip_code_avg.Month == 5)]
June_37228 = zip_code_avg.loc[(zip_code_avg.zip == '37228') & (zip_code_avg.Month == 5)]
June_37216 = zip_code_avg.loc[(zip_code_avg.zip == '37216') & (zip_code_avg.Month == 5)]
June_37205 = zip_code_avg.loc[(zip_code_avg.zip == '37205') & (zip_code_avg.Month == 5)]
June_37211 = zip_code_avg.loc[(zip_code_avg.zip == '37211') & (zip_code_avg.Month == 5)]
June_37217 = zip_code_avg.loc[(zip_code_avg.zip == '37217') & (zip_code_avg.Month == 5)]
June_37013 = zip_code_avg.loc[(zip_code_avg.zip == '37013') & (zip_code_avg.Month == 5)]
June_37214 = zip_code_avg.loc[(zip_code_avg.zip == '37214') & (zip_code_avg.Month == 5)]
June_37218 = zip_code_avg.loc[(zip_code_avg.zip == '37218') & (zip_code_avg.Month == 5)]
June_37115 = zip_code_avg.loc[(zip_code_avg.zip == '37115') & (zip_code_avg.Month == 5)]
June_37221 = zip_code_avg.loc[(zip_code_avg.zip == '37221') & (zip_code_avg.Month == 5)]
June_37220 = zip_code_avg.loc[(zip_code_avg.zip == '37220') & (zip_code_avg.Month == 5)]
June_37138 = zip_code_avg.loc[(zip_code_avg.zip == '37138') & (zip_code_avg.Month == 5)]
June_37072 = zip_code_avg.loc[(zip_code_avg.zip == '37072') & (zip_code_avg.Month == 5)]

July_37203 = zip_code_avg.loc[(zip_code_avg.zip == '37203') & (zip_code_avg.Month == 5)]
July_37201 = zip_code_avg.loc[(zip_code_avg.zip == '37201') & (zip_code_avg.Month == 5)]
July_37219 = zip_code_avg.loc[(zip_code_avg.zip == '37219') & (zip_code_avg.Month == 5)]
July_37212 = zip_code_avg.loc[(zip_code_avg.zip == '37212') & (zip_code_avg.Month == 5)]
July_37204 = zip_code_avg.loc[(zip_code_avg.zip == '37204') & (zip_code_avg.Month == 5)]
July_37208 = zip_code_avg.loc[(zip_code_avg.zip == '37208') & (zip_code_avg.Month == 5)]
July_37206 = zip_code_avg.loc[(zip_code_avg.zip == '37206') & (zip_code_avg.Month == 5)]
July_37213 = zip_code_avg.loc[(zip_code_avg.zip == '37213') & (zip_code_avg.Month == 5)]
July_37210 = zip_code_avg.loc[(zip_code_avg.zip == '37210') & (zip_code_avg.Month == 5)]
July_37240 = zip_code_avg.loc[(zip_code_avg.zip == '37240') & (zip_code_avg.Month == 5)]
July_37232 = zip_code_avg.loc[(zip_code_avg.zip == '37232') & (zip_code_avg.Month == 5)]
July_37207 = zip_code_avg.loc[(zip_code_avg.zip == '37207') & (zip_code_avg.Month == 5)]
July_37209 = zip_code_avg.loc[(zip_code_avg.zip == '37209') & (zip_code_avg.Month == 5)]
July_37215 = zip_code_avg.loc[(zip_code_avg.zip == '37215') & (zip_code_avg.Month == 5)]
July_37228 = zip_code_avg.loc[(zip_code_avg.zip == '37228') & (zip_code_avg.Month == 5)]
July_37216 = zip_code_avg.loc[(zip_code_avg.zip == '37216') & (zip_code_avg.Month == 5)]
July_37205 = zip_code_avg.loc[(zip_code_avg.zip == '37205') & (zip_code_avg.Month == 5)]
July_37211 = zip_code_avg.loc[(zip_code_avg.zip == '37211') & (zip_code_avg.Month == 5)]
July_37217 = zip_code_avg.loc[(zip_code_avg.zip == '37217') & (zip_code_avg.Month == 5)]
July_37013 = zip_code_avg.loc[(zip_code_avg.zip == '37013') & (zip_code_avg.Month == 5)]
July_37214 = zip_code_avg.loc[(zip_code_avg.zip == '37214') & (zip_code_avg.Month == 5)]
July_37218 = zip_code_avg.loc[(zip_code_avg.zip == '37218') & (zip_code_avg.Month == 5)]
July_37115 = zip_code_avg.loc[(zip_code_avg.zip == '37115') & (zip_code_avg.Month == 5)]
July_37221 = zip_code_avg.loc[(zip_code_avg.zip == '37221') & (zip_code_avg.Month == 5)]
July_37220 = zip_code_avg.loc[(zip_code_avg.zip == '37220') & (zip_code_avg.Month == 5)]
July_37138 = zip_code_avg.loc[(zip_code_avg.zip == '37138') & (zip_code_avg.Month == 5)]
July_37072 = zip_code_avg.loc[(zip_code_avg.zip == '37072') & (zip_code_avg.Month == 5)]


In [23]:
month_avgs(zip_code_avg)

In [27]:
July_37203.head()

Unnamed: 0,companyName,sumdID,Month,zip
0,Bird,Powered9EAJL,5,37203
1,Lyft,Powered296631,5,37203
2,Bird,Powered7S2UU,5,37203
3,Bird,PoweredZIIVX,5,37203
5,Bird,PoweredEJF1K,5,37203


In [2]:
test = pd.read_csv('data/all_df.csv')

In [3]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 7 columns):
zip                   27 non-null int64
rides                 27 non-null int64
scooters              27 non-null int64
average rides         27 non-null float64
perc_total_rides      27 non-null object
existing_alloc        27 non-null float64
allocation_per_zip    27 non-null int64
dtypes: float64(2), int64(4), object(1)
memory usage: 1.6+ KB


In [8]:
rides = test.rides

In [7]:
import pandas as pd
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
 
# Set the dimension of the figure
my_dpi=96
plt.figure(figsize=(2600/my_dpi, 1800/my_dpi), dpi=my_dpi)
 
# read the data (on the web)
data = pd.read_csv('http://python-graph-gallery.com/wp-content/uploads/TweetSurfData.csv', sep=";")
 
# Make the background map
m=Basemap(llcrnrlon=-180, llcrnrlat=-65,urcrnrlon=180,urcrnrlat=80)
m.drawmapboundary(fill_color='#A6CAE0', linewidth=0)
m.fillcontinents(color='grey', alpha=0.3)
m.drawcoastlines(linewidth=0.1, color="white")
 
# prepare a color for each point depending on the continent.
data['labels_enc'] = pd.factorize(data['homecontinent'])[0]
 
# Add a point per position
m.scatter(data['homelon'], data['homelat'], s=data['n']/6, alpha=0.4, c=data['labels_enc'], cmap="Set1")
 
# copyright and source data info
plt.text( -170, -58,'Where people talk about #Surf\n\nData collected on twitter by @R_Graph_Gallery during 300 days\nPlot realized with Python and the Basemap library', ha='left', va='bottom', size=9, color='#555555' )
 
# Save as png
plt.savefig('#315_Tweet_Surf_Bubble_map1.png', bbox_inches='tight')


ModuleNotFoundError: No module named 'mpl_toolkits.basemap'

In [9]:
rides

0     206923
1      82044
2      37407
3      30235
4      26101
5      19313
6      17504
7      17275
8      10694
9       3765
10      2355
11      2229
12      1624
13       453
14       424
15       368
16       302
17        85
18        45
19        39
20        39
21        23
22        15
23        10
24         2
25         2
26         1
Name: rides, dtype: int64