In [1]:
#standard libraries
import pandas as pd
import numpy as np
from io import StringIO
import requests

In [2]:
# Fetch survey responses data
survey_url = 'https://docs.google.com/spreadsheets/d/1iDsbDwcxj5nygYZASAIIlY7Jo58tzrxK4qc4Xemj6vU/export?format=csv&gid=1697481328'
response_survey = requests.get(survey_url)
survey_data = response_survey.content
survey_df = pd.read_csv(StringIO(survey_data.decode('utf-8')))

# Fetch content recommendations data
content_url = 'https://docs.google.com/spreadsheets/d/1iDsbDwcxj5nygYZASAIIlY7Jo58tzrxK4qc4Xemj6vU/export?format=csv&gid=0'
response_content = requests.get(content_url)
content_data = response_content.content
content_df = pd.read_csv(StringIO(content_data.decode('utf-8')))

In [3]:
mapping = {
    'Kevin Bradley': 'Kevin Bradley',
    'Martin Swoverland': 'Martin Swoverland',
    'Valerie Velasquez': 'Valerie Velasquez',
    'YURANIS J SALCEDO': 'Yuranis Salcedo',
    'Amanda Geerhart': 'Amanda Geerhart',
    'Salah Zaman': 'Salah Zaman',
    'Charles': 'Charles Okunfolami',
    'Bibi Persaud': 'Bibi Persaud',
    'Antonia': 'Antonia Demeny',
    'Adir Cohen': 'Adir Cohen',
    'Nicholas ': 'Nicholas Alonzo',
    'Jacqueline Zhinin': 'Jacqueline Zhinin',
    'Jeremy Simhon': 'Jeremy Simhon',
    'Steven Phillips': 'Steven Phillips',
    'Chizzy Exe': 'Chizzy Exe',
    'Mathew Santos': 'Mathew Santos',
    'Ashish Jha': 'Ashish Jha'
}

survey_df['Name'] = survey_df['Add your name (so I can match up names via vlookup)'].replace(mapping)

In [4]:
def convert_rating(value):
    if pd.isna(value):
        return np.nan
    if value == 'Desirable':
        return 5
    elif value == 'Undesirable':
        return 1
    elif 'Loved It' in str(value):
        return 5
    elif 'Liked It' in str(value):
        return 4
    elif 'No basis for judgment' in str(value):
        return np.nan
    try:
        return float(value)
    except ValueError:
        return np.nan

# Apply the conversion to all columns that start with "Rate these"
for col in survey_df.columns:
    if col.startswith('Rate these'):
        survey_df[col] = survey_df[col].apply(convert_rating)

# Split the superhero powers
survey_df['Superhero Powers'] = survey_df['Select up to 3 of your top super hero powers'].str.split(',')

# Split the snack choices
survey_df['Snack Choices'] = survey_df['What snack are you looking for at break? (select up to 3)'].str.split(',')

# Handle missing values in the content data
# For simplicity, let's fill missing author names with 'Unknown'
content_df['Author'] = content_df['Author'].fillna('Unknown')

# Now perform some basic label encoding
# We'll convert 'Type' to a numerical representation
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
content_df['Type_encoded'] = label_encoder.fit_transform(content_df['Type'])


In [5]:
# Now, attempt to merge the dataframes again
merged_df = survey_df.merge(content_df, left_on='Name', right_on='Name')


In [14]:

#Baiscally I am now going to save by printing to Googlesheets
merged_df.to_clipboard(index=False)
