In [71]:
import os
import pandas as pd
import numpy as np
from datetime import datetime
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import time
import pickle
import json




In [72]:
# Fetch credentials from environment variables
# SPOTIPY_CLIENT_ID = os.getenv('SPOTIPY_CLIENT_ID')
# SPOTIPY_CLIENT_SECRET = os.getenv('SPOTIPY_CLIENT_SECRET')

SPOTIPY_CLIENT_ID =
SPOTIPY_CLIENT_SECRET =

In [73]:
# Spotify API setup
client_credentials_manager = SpotifyClientCredentials(client_id=SPOTIPY_CLIENT_ID, client_secret=SPOTIPY_CLIENT_SECRET)
sp = Spotify(client_credentials_manager=client_credentials_manager)


In [74]:

def get_top_tracks_for_state(state, limit=50):
    results = sp.search(q=f'track genre:{state}', type='track', limit=limit)
    return results['tracks']['items']

def get_artist_genres(artist_id):
    try:
        artist = sp.artist(artist_id)
        return artist['genres']
    except:
        return []

def get_genres_for_timeframe(state, start_date, end_date):
    tracks = get_top_tracks_for_state(state)
    all_genres = []
    for track in tracks:
        try:
            release_date = track['album']['release_date']
            if len(release_date) == 4:  # Only year is provided
                release_date = datetime.strptime(release_date, '%Y')
            elif len(release_date) == 7:  # Year and month are provided
                release_date = datetime.strptime(release_date, '%Y-%m')
            else:  # Full date is provided
                release_date = datetime.strptime(release_date, '%Y-%m-%d')
            
            # Ensure release_date is naive
            release_date = release_date.replace(tzinfo=None)
            
            if start_date <= release_date <= end_date:
                artist_id = track['artists'][0]['id']
                genres = get_artist_genres(artist_id)
                all_genres.extend(genres)
        except (ValueError, KeyError):
            continue  # Skip this track if we can't parse the date or if the key doesn't exist
    return all_genres

def rank_genres_for_states(fema_data):
    genre_rankings = {}
    for state in fema_data['state'].unique():
        state_data = fema_data[fema_data['state'] == state]
        start_date = state_data['incidentBeginDate'].min().replace(tzinfo=None)
        end_date = state_data['incidentEndDate'].max().replace(tzinfo=None)
        
        print(f"Fetching data for {state} from {start_date} to {end_date}")
        genres = get_genres_for_timeframe(state, start_date, end_date)
        genre_count = Counter(genres)
        
        genre_rankings[state] = genre_count
        
        time.sleep(1)  # To avoid hitting API rate limits
    
    return genre_rankings

In [75]:
# Load FEMA data
fema_data = pd.read_csv('../Weather/fema_disaster_declarations_processed.csv')

In [76]:
# Ensure date columns are datetime
date_columns = ['declarationDate', 'incidentBeginDate', 'incidentEndDate']
for col in date_columns:
    fema_data[col] = pd.to_datetime(fema_data[col])

In [77]:
# Get genre rankings
print("Ranking genres for states...")
genre_rankings = rank_genres_for_states(fema_data)

Ranking genres for states...
Fetching data for NH from 2011-08-26 00:00:00 to 2022-12-25 00:00:00
Fetching data for RI from 2023-04-14 00:00:00 to 2023-04-16 00:00:00
Fetching data for KS from 2016-03-23 00:00:00 to 2023-04-16 00:00:00
Fetching data for CO from 2013-06-11 00:00:00 to 2023-06-23 00:00:00
Fetching data for GA from 2016-11-13 00:00:00 to 2023-08-30 00:00:00
Fetching data for TX from 2014-05-11 00:00:00 to 2023-02-02 00:00:00
Fetching data for OR from 2013-07-19 00:00:00 to 2021-08-31 00:00:00
Fetching data for FL from 2017-04-20 00:00:00 to 2023-04-14 00:00:00
Fetching data for CA from 2012-08-12 00:00:00 to 2023-02-26 00:00:00
Fetching data for SD from 2012-08-31 00:00:00 to 2022-12-25 00:00:00
Fetching data for WA from 2012-08-13 00:00:00 to 2022-11-08 00:00:00
Fetching data for UT from 2013-08-13 00:00:00 to 2021-08-18 00:00:00
Fetching data for ID from 2012-08-15 00:00:00 to 2021-08-17 00:00:00
Fetching data for HI from 2018-08-22 00:00:00 to 2021-08-03 00:00:00
Fetch

In [84]:
state_genres = pd.DataFrame(genre_rankings)
state_genres.count().sort_values(ascending=False).head(10)

CO    62
FL    34
CA    34
HI    29
ID    24
WA    21
AZ    19
GA    19
OR    13
OK    13
dtype: int64

In [87]:
# Save genre_rankings using pickle
with open('state_genre_rankings.pkl', 'wb') as f:
    pickle.dump(genre_rankings, f)
print("Genre rankings saved successfully to 'state_genre_rankings.pkl'")

# Save genre_rankings as JSON
with open('state_genre_rankings.json', 'w') as f:
    json.dump({k: dict(v) for k, v in genre_rankings.items()}, f)
print("Genre rankings saved successfully to 'state_genre_rankings.json'")


Genre rankings saved successfully to 'state_genre_rankings.pkl'
Genre rankings saved successfully to 'state_genre_rankings.json'


In [89]:
# Overall genre ranking
all_genres = Counter()
for genres in genre_rankings.values():
    all_genres += genres

print("Top 20 Genres Overall:")
for genre, count in all_genres.most_common(20):
    print(f"{genre}: {count}")

# Save results to CSV
results_df = pd.DataFrame([(county, genre, count) 
                           for county, genres in genre_rankings.items() 
                           for genre, count in genres.items()],
                          columns=['County', 'Genre', 'Count'])
# results_df.to_csv('county_genre_rankings.csv', index=False)

print("Analysis complete. Results saved to CSV and plots generated.")

Top 20 Genres Overall:
wyoming indie: 35
wyoming roots: 35
azontobeats: 29
pop: 23
canadian pop: 19
orchestral soundtrack: 15
coupe-decale: 15
canadian contemporary r&b: 14
soundtrack: 14
musique urbaine kinshasa: 14
ndombolo: 14
r&b francais: 14
rumba congolaise: 14
trap: 13
rap: 12
hip hop: 10
florida rap: 10
cali rap: 10
pop rap: 9
florida drill: 8
Analysis complete. Results saved to CSV and plots generated.
