In [None]:
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
bikes_df = pd.read_csv('datasets/current_bluebikes_stations.csv', header=1)
bikes_df.head()

In [None]:
bikes_df.District.value_counts()

In [None]:
bikes_df.iloc[35,:]

In [None]:
bikes_df.dtypes

In [None]:
bikes_df.isna().sum()

In [None]:
bikes_df['Public'].value_counts()

In [None]:
# convert variables to string dtype
feat = ['Name', 'District']

for feature in feat:
    # case insensitive search - set to lowercase
    #bikes_df[feature] = bikes_df[feature].apply(str.lower).str.strip()
    # apply string data type
    bikes_df[feature] = bikes_df[feature].astype('string')

In [None]:
bikes_df.dtypes

In [None]:
sns.barplot(x='District',y='Total docks', data=bikes_df, errorbar=('ci', 0), palette = "Blues")
plt.xlabel('District name')
plt.ylabel('Number of bike station docks')
plt.title('Total number of docks in each district for blue bike stations')

plt.xticks(rotation="vertical")


plt.show()

In [None]:
trip_df = pd.read_csv('datasets/202301-bluebikes-tripdata.csv')
trip_df.head()

In [None]:
trip_df.isna().sum()

In [None]:
trip_df.dtypes

In [None]:
# convert variables to string dtype
feat = ['start station name', 'end station name', 'usertype']

for feature in feat:
    # case insensitive search - set to lowercase
    #bikes_df[feature] = bikes_df[feature].apply(str.lower).str.strip()
    # apply string data type
    trip_df[feature] = trip_df[feature].astype('string')

In [None]:
# set data types for numerical timestamp values

number_feat = ['starttime', 'stoptime']

for feature in number_feat: 
    # convert data type to datetime
    trip_df[feature] = trip_df[feature].astype('datetime64[ns]')

In [None]:
trip_df.dtypes

In [None]:
import geopandas 
import geodatasets
import pandas as pd
import numpy as np

In [None]:
county = geopandas.read_file("datasets/ne_10m_admin_2_counties/ne_10m_admin_2_counties.shp")
county.head()

In [None]:
county['fips'] = county['FIPS'].apply(lambda x: int(x[2:]))

In [None]:
ma_df = county[county['ISO_3166_2'] == "US-25"]
ma_df.plot()

In [None]:
boston_counties = ['Middlesex', 'Norfolk', 'Suffolk', 'Essex']

# Filtering ma_df based on boston_counties
new_df = ma_df[ma_df['NAME'].isin(boston_counties)]
    
new_df

In [None]:
new_df.plot(color = 'white', edgecolor = "black")

In [None]:
import geopandas 
import geodatasets

In [None]:
from shapely.geometry import Point
#Create the geospatial data from columns

trip_df1 = trip_df.copy()
trip_df1['geometry'] = [Point(xy) for xy in zip(trip_df1['start station longitude'],
                                                 trip_df1['start station latitude'])]
trip_df1.head()

In [None]:
from geopandas import GeoDataFrame
# Create the GeoDataFrame
gdf = GeoDataFrame(trip_df1, geometry='geometry')

# Plotting
base = new_df.plot(color = 'white', edgecolor = 'black')

# Plot trip points
gdf.plot(ax=base, marker='^', color='green', markersize=2)

plt.title('Trip Start Points')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()

In [None]:
point_gdf = gdf.explode(index_parts = True)
point_gdf.head()

In [None]:
point_gdf1 = point_gdf.drop(['starttime', 'stoptime'], axis=1)

In [None]:
point_gdf1

In [None]:
import folium

In [None]:
m = new_df.explore(
    column = "NAME", 
    tooltip = 'NAME', 
    popup = ['NAME'],
    legend = True
)
m

In [None]:
point_gdf.plot()

In [None]:
m = new_df.explore(
    column = "NAME", 
    tooltip = 'NAME', 
    popup = ['NAME'],
    legend = True
)

point_gdf1.explore(
    m = m,
    color = "blue", 
    marker_kwds = dict(radius = 3, fill = True), 
    tooltip = 'start station name', 
    name = "stations"
)

folium.TileLayer("CartoDB positron", show = False).add_to(m)
folium.LayerControl().add_to(m)

In [None]:
m.save('figure.html')

In [None]:
trip_df1

In [None]:
trip_df1

In [None]:
import altair as alt
chart = alt.Chart(trip_df1)

In [None]:
# create scatterplot with single selection

selection = alt.selection_single()

alt.Chart(trip_df1).mark_circle().encode(
    x='start station longitude:Q',
    y='start station latitude:Q',
    tooltip=['start station name:N'],
).add_params(
    selection
).properties(width=700, height=300, 
             title='Average personal income vs personal health score for all countries based on region')