## Visualização da distribuição geográfica e uso das estações

In [2]:
# importa a biblioteca pandas
import pandas as pd

In [3]:
# importa a biblioteca glob 
from glob import glob

# lê todos os arquivos de dados da pasta utilizando a biblioteca glob
files = sorted(glob('20*.csv'))

content = []
# variáveis que serão importadas e utilizadas nas análises
colunas = ["rideable_type", "start_station_name", "end_station_name", "start_lat", "end_lat", "start_lng", 
           "end_lng", "member_casual"]  

# carrega os arquivos com os dados e os adiciona à lista "content"
for filename in files:
    df = pd.read_csv(filename, index_col=None, usecols = colunas)
    content.append(df)

# agrupa os dados em um único dataframe   
data = pd.concat(content, ignore_index = True)

In [4]:
data.head()

Unnamed: 0,rideable_type,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng,member_casual
0,classic_bike,State St & Pearson St,Southport Ave & Waveland Ave,41.897448,-87.628722,41.94815,-87.66394,member
1,docked_bike,Dorchester Ave & 49th St,Dorchester Ave & 49th St,41.805772,-87.592464,41.805772,-87.592464,casual
2,docked_bike,Loomis Blvd & 84th St,Loomis Blvd & 84th St,41.741487,-87.65841,41.741487,-87.65841,casual
3,classic_bike,Honore St & Division St,Southport Ave & Waveland Ave,41.903119,-87.673935,41.94815,-87.66394,member
4,docked_bike,Loomis Blvd & 84th St,Loomis Blvd & 84th St,41.741487,-87.65841,41.741487,-87.65841,casual


In [5]:
# verifica a quantidade de dados faltantes em cada coluna
data.isnull().sum()

rideable_type              0
start_station_name    745376
end_station_name      796247
start_lat                  0
start_lng                  0
end_lat                 4716
end_lng                 4716
member_casual              0
dtype: int64

In [6]:
# remove as linhas com dados faltes dos nomes das estações de início ou fim
new_data = data.dropna(subset=['start_station_name', 'end_station_name'])

In [7]:
new_data.isnull().sum()

rideable_type         0
start_station_name    0
end_station_name      0
start_lat             0
start_lng             0
end_lat               0
end_lng               0
member_casual         0
dtype: int64

In [9]:
# importa a biblioteca folium 
import folium

In [10]:
# cria um mapa da cidade de Chicago
m = folium.Map(location=[41.897448,-87.628722], zoom_start=10, tiles = 'OpenStreetMap')

In [11]:
m

In [12]:
# agrupa os dados pela estação de início e retorna um dataframe com a localização das estações e número de
# compartilhamentos em cada uma
start_stations = new_data.groupby(['start_station_name']).agg({'start_lat' : 'mean', 'start_lng' : 'mean', 
                                    'member_casual' : 'count'}).rename(columns={'member_casual' : 'count'})

In [13]:
start_stations

Unnamed: 0_level_0,start_lat,start_lng,count
start_station_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2112 W Peterson Ave,41.991174,-87.683592,838
351,41.930000,-87.780000,1
63rd St Beach,41.780951,-87.576256,1934
900 W Harrison St,41.874753,-87.649807,8818
Aberdeen St & Jackson Blvd,41.877732,-87.654801,11651
...,...,...,...
Woodlawn Ave & 55th St,41.795267,-87.596471,7063
Woodlawn Ave & 75th St,41.759169,-87.595757,43
Woodlawn Ave & Lake Park Ave,41.814090,-87.597009,1603
Yates Blvd & 75th St,41.758764,-87.566440,177


In [14]:
# adiciona marcadores em forma de círculos em cada estação com os raios proporcionais ao número de compartilhamentos
for i in range(len(start_stations)):
   folium.CircleMarker(
      location=[start_stations.iloc[i]['start_lat'], start_stations.iloc[i]['start_lng']],
      popup='Start',
      radius=float(start_stations.iloc[i]['count'])/2000,
      color='crimson',
      fill=True,
      fill_color='crimson'
   ).add_to(m)

In [15]:
m