In [10]:
import pandas as pd
import pandas as pd
import numpy as np
import warnings
from sklearn.manifold import TSNE
import plotly.express as px
from helpers import load_data, get_embedding
import tqdm
import plotly.graph_objects as go
import json

warnings.filterwarnings("ignore")

DATA_PATH = 'data/'

# Load the data
loaded_data = load_data(DATA_PATH)

character_metadata = loaded_data['character_metadata']
movie_metadata = loaded_data['movie_metadata']
plot_summaries = loaded_data['plot_summaries']
embeddings = loaded_data['embeddings']
combined_plot_summaries = loaded_data['combined_plot_summaries']
city_country_analysis = loaded_data['city_country_analysis']
cities = city_country_analysis['cities']
countries = city_country_analysis['countries']
cities_movies = city_country_analysis['cities_movies']
countries_movies = city_country_analysis['countries_movies']
embeddings_of_movies_in_cities = city_country_analysis['embeddings_of_movies_in_cities']
embeddings_of_movies_in_countries = city_country_analysis['embeddings_of_movies_in_countries']

In [11]:
general_terms = ['Drugs', 'Love', 'War', 'Poverty', 'Comedy', 'Happiness', 'Sadness', 'Gang', 'Hippies', 'Guns']
embeddings_of_general_terms = { general_term: get_embedding(general_term) for general_term in general_terms }

In [12]:
USE_CITIES = True

In [13]:
if USE_CITIES:
    similarity_movie_to_term = { city : { general_term: 0.0 for general_term in general_terms } for city in cities }
    for city in cities:
        for term, term_embedding in embeddings_of_general_terms.items():
            cosine_similarities = np.dot(np.array(embeddings_of_movies_in_cities[city]).reshape(-1, 1536), term_embedding)
            similarity_movie_to_term[city][term] = np.mean(cosine_similarities)
else:
    similarity_movie_to_term = { country : { general_term: 0.0 for general_term in general_terms } for country in countries }
    for country in countries:
        for term, term_embedding in embeddings_of_general_terms.items():
            cosine_similarities = np.dot(np.array(embeddings_of_movies_in_countries[country]).reshape(-1, 1536), term_embedding)
            similarity_movie_to_term[country][term] = np.mean(cosine_similarities)

In [14]:
precomputed_data = {}

for term in general_terms:
    data_pairs = [(country, similarity_movie_to_term[country][term]) for country in countries] if not USE_CITIES else [(city, similarity_movie_to_term[city][term]) for city in cities]
    data_pairs.sort(key=lambda x: x[1])

    sorted_locations = [pair[0] for pair in data_pairs]
    similarity_scores = [pair[1] for pair in data_pairs]
    hover_text = [f"{city}: {score:.2f}" for city, score in zip(sorted_locations, similarity_scores)]

    precomputed_data[term] = {
        "sorted_locations": sorted_locations,
        "similarity_scores": similarity_scores,
        "hover_text": hover_text
    }


In [15]:

# Initial term
initial_term = general_terms[0]

# Create the figure
fig = go.Figure(data=[go.Scatter(
    x=precomputed_data[initial_term]["similarity_scores"], 
    y=[1] * len(precomputed_data[initial_term]["similarity_scores"]), 
    mode='markers',
    text=precomputed_data[initial_term]["hover_text"],
    hoverinfo='text'
)])

# Update layout and add dropdown
fig.update_layout(
    title_text=f'Similarity to the Word "{initial_term}" for {"cities" if USE_CITIES else "countries"}',
    xaxis=dict(
        title='Similarity Score',
        tickvals=precomputed_data[initial_term]["similarity_scores"],
        ticktext=precomputed_data[initial_term]["sorted_locations"]
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=False,
        showticklabels=False,
    ),
    hovermode='closest',
    showlegend=False,
    updatemenus=[dict(
        buttons=[dict(
            label=term,
            method='update',
            args=[{'x': [precomputed_data[term]["similarity_scores"]], 
                   'text': [precomputed_data[term]["hover_text"]]},
                  {'title': f'Similarity to the Word "{term}" for {"cities" if USE_CITIES else "countries"}',
                   'xaxis.tickvals': precomputed_data[term]["similarity_scores"], 
                   'xaxis.ticktext': precomputed_data[term]["sorted_locations"]}]
        ) for term in general_terms],
        direction='down',
        showactive=True,
    )],
    hoverlabel=dict(
        bgcolor="white", 
        font_size=16, 
        font_family="Rockwell"
    ),
)

fig.update_xaxes(rangeslider=dict(visible=True))

fig.show()

In [16]:
import googlemaps
import pandas as pd
import os

gmaps = googlemaps.Client(key=os.environ['GOOGLE_MAPS_API_KEY'])

city_coordinates = {}
country_coordinates = {}


if USE_CITIES:
    for city in cities:
        geocode_result = gmaps.geocode(city)
        if geocode_result:
            lat = geocode_result[0]["geometry"]["location"]["lat"]
            lng = geocode_result[0]["geometry"]["location"]["lng"]
            city_coordinates[city] = (lat, lng)
        else:
            city_coordinates[city] = (None, None)
else:
    for country in countries:
        geocode_result = gmaps.geocode(country)
        if geocode_result:
            lat = geocode_result[0]["geometry"]["location"]["lat"]
            lng = geocode_result[0]["geometry"]["location"]["lng"]
            country_coordinates[country] = (lat, lng)
        else:
            country_coordinates[country] = (None, None)


In [17]:
from sklearn.preprocessing import MinMaxScaler

# Dictionary to hold data for each term
term_data = {}

scaler = MinMaxScaler()

for term in general_terms:
    similarity_scores = [similarity_movie_to_term[city][term] for city in cities] if USE_CITIES else [similarity_movie_to_term[country][term] for country in countries]
    df = pd.DataFrame({
        'Latitude': [city_coordinates[city][0] for city in cities] if USE_CITIES else [country_coordinates[country][0] for country in countries],
        'Longitude': [city_coordinates[city][1] for city in cities] if USE_CITIES else [country_coordinates[country][1] for country in countries],
        'Similarity': similarity_scores
    })
    
    if USE_CITIES:
        df['City'] = cities
    else:
        df['Country'] = countries

    df['Similarity_Scaled'] = scaler.fit_transform(df[['Similarity']])
    term_data[term] = df


In [18]:
import plotly.graph_objects as go

# Initial term for the map
initial_term = general_terms[0]
df = term_data[initial_term]

# Creating the initial map
fig = go.Figure(go.Scattergeo(
    lat=df['Latitude'],
    lon=df['Longitude'],
    text=df['City'] + ": " + df['Similarity'].astype(str) if USE_CITIES else df['Country'] + ": " + df['Similarity'].astype(str),
    marker=dict(
        color=df['Similarity_Scaled'],
        line_color='rgb(40,40,40)',
        line_width=0.5,
        sizemode='diameter'
    ),
    hoverinfo='text'
))

fig.update_layout(
    title_text=f"Map for Similarity to the Word '{initial_term}' for Cities and Countries",
    geo=dict(
        showland=True,
        landcolor='rgb(217, 217, 217)',
        projection_type='natural earth'
    ),
    updatemenus=[dict(
        buttons=[dict(
            label=term,
            method='update',  # Using 'update' to change both data and layout
            args=[{'lat': [term_data[term]['Latitude']], 
                   'lon': [term_data[term]['Longitude']],
                   'text': [term_data[term]['City'] + ": " + term_data[term]['Similarity'].astype(str)] if USE_CITIES else [term_data[term]['Country'] + ": " + term_data[term]['Similarity'].astype(str)],
                   'marker.color': [term_data[term]['Similarity_Scaled']]},
                  {'title': f"Map for Similarity to the Word '{term}' for Cities and Countries"}]  # Layout update for title
        ) for term in general_terms],
        direction='down',
        showactive=True
    )]
)

fig.show()
# Save /plots/2-map.html
fig.write_html("plots/2-map.html")