In [1]:
import pandas as pd
import plotly.express as px


In [10]:
data = pd.read_pickle('df_pickle.pkl')
data.columns

Index(['Year', 'quarter', 'citymarketid_1', 'citymarketid_2', 'city1', 'city2',
       'airportid_1', 'airportid_2', 'airport_1', 'airport_2', 'nsmiles',
       'passengers', 'fare', 'carrier_lg', 'large_ms', 'fare_lg',
       'carrier_low', 'lf_ms', 'fare_low', 'tbl1apk', 'city1_small',
       'city2_small', 'city1_lati', 'city1_long', 'city2_lati', 'city2_long'],
      dtype='object')

In [17]:
cols = ['Year', 'city1_small', 'city1_lati', 'city1_long', 'passengers']
data_subset = data[cols]

# Calculate city popularity (how often each city appears in the dataset)
city_popularity = data_subset['city1_small'].value_counts().to_dict()

city_passengers = data_subset.groupby('city1_small')['passengers'].sum()

# Multiply the number of passengers by the city popularity
city_weight = city_passengers * pd.Series(city_popularity).reindex(city_passengers.index)

In [18]:

final_data = []

# Iterate over the cities and years, checking if the city has latitude and longitude data
for index, row in data_subset.iterrows():
    city = row['city1_small']
    city_year = row['Year']
    lat = row['city1_lati']
    lon = row['city1_long']
    
    # Ensure the city has valid latitude and longitude
    if pd.notna(lat) and pd.notna(lon):
        final_data.append({
            'city': city,
            'year': city_year,
            'lat': lat,
            'lon': lon,
            'popularity': city_weight  # Add the popularity as weight
        })

df = pd.DataFrame(final_data)

df['year'] = df['year'].astype(int)

df = df.sort_values('year')

df['year'] = pd.Categorical(df['year'], categories=sorted(df['year'].unique()), ordered=True)

# Calculate the center of the map based on the average latitude and longitude
map_center = {
    'lat': df['lat'].mean(),
    'lon': df['lon'].mean()
}

# Create the density map using Plotly Express
fig = px.density_mapbox(
    df,
    lat='lat',
    lon='lon',
    z='popularity',  # Use city popularity for the density weighting
    radius=25,  # Adjust radius based on the data spread
    center=map_center,
    zoom=3,  # Set the zoom level for better visibility
    mapbox_style='open-street-map',
    animation_frame='year'
    title='City Popularity Over Time',
    category_orders={'year': df['year'].cat.categories}
)

# Display the figure
fig.show()
