# GENDER AMBIGUOUS NAMES - VISUALIZATION

### WHEN FRIENDS OF MINE WERE EXPECTING THEIR FIRST BORN, WE INEVITABLY SPOKE ABOUT NAMES... 
_WHERE WOULD YOU FIND INSPIRATION? WOULD IT  NEED TO BE SOMETHING UNIQUE? ARE THERE NAMES YOU'D WANT TO AVOID?_ 

#### IT TURNED OUT THEY WANTED A NAME WHICH WOULD WORK REGARDLESS OF WHETHER THEY WERE TO HAVE A BOY OR GIRL. I KNEW ABOUT THE USA'S SOCIAL SECURITY DATABASE AND THOUGHT THAT IT COULD COME IN HANDY FOR SOMETHING LIKE THIS...

#### Below, we have a scatter graph which allows the user to plot a number of random names, define how modern the names will be by selecting a year range, and how ambiguous the gender of the name would be having given it an ambiguity factor from 0-100...

In [1]:
import pandas as pd
import plotly.express as px
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [29]:
def random_name_viz(min_year, max_year, min_ambig_scr, max_ambig_scr, smpl_size):
    
## Import File and Sort
    all_df = pd.read_csv('../data/baby_names_all_years.csv')
    all_df.sort_values(by=['year', 'name', 'gender'], ascending=[False, True, False], inplace=True)
    
## Pivot to reduce name & year to one row, with female and male quantities as new values
    pivot_df = all_df.pivot_table(index=['name', 'year'], 
                              columns='gender', values='frequency', 
                              aggfunc='sum').reset_index()

    pivot_df.sort_values(by=['year', 'name'], ascending=[False, True],inplace=True)
    pivot_df.reset_index(drop=True,inplace=True)
    pivot_df.fillna(0,inplace=True)
    pivot_df.rename(columns={'M': 'male_freq', 'F': 'feme_freq'}, inplace=True)
    
    pivot_df['ttl_freq'] = pivot_df['feme_freq'] + pivot_df['male_freq']

    pivot_df['feme_pc'] = round((pivot_df['feme_freq'] / pivot_df['ttl_freq'])*100,2)
    pivot_df['male_pc'] = round((pivot_df['male_freq'] / pivot_df['ttl_freq'])*100,2)

    pivot_df = pivot_df[['year','name','ttl_freq',
                         'feme_freq','feme_pc','male_freq','male_pc']]
    
## Add the ambiguity score for each name per year

    def calc_ambig_scr(row):
        male_pc = row['male_pc']
        feme_pc = row['feme_pc']
        if feme_pc < 50:
            return feme_pc * 2
        else:
            return male_pc * 2

    pivot_df['ambig_scr'] = pivot_df.apply(lambda row: calc_ambig_scr(row), axis=1)
    pivot_df['weblink'] = pivot_df['name'].apply(lambda x: f'<a href="https://babynames.com/name/{x}">{x}</a>')
    
## Create a Function to make these variable sample dataframes
    def sample_df(min_year, max_year, min_ambig_scr, max_ambig_scr, smpl_size):
        sample_df = pivot_df[(pivot_df['year'] >= min_year) & 
                             (pivot_df['year'] <= max_year) &
                             (pivot_df['ambig_scr'] >= min_ambig_scr) &
                             (pivot_df['ambig_scr'] <= max_ambig_scr)]
    
        sample_df = sample_df.pivot_table(index='name', 
                              values=['ttl_freq', 'feme_freq', 'male_freq',
                                      'feme_pc','male_pc', 'ambig_scr', 'weblink'], 
                              aggfunc={'ttl_freq': 'sum', 'feme_freq': 'sum', 'male_freq': 'sum', 
                                       'feme_pc': 'mean', 'male_pc': 'mean', 'ambig_scr': 'mean', 
                                       'weblink': 'min'})
    
        sample_df = sample_df.round(2)
        
        ranking = sample_df['ttl_freq'].rank(ascending=False, method='dense')
        amount = len(sample_df)
        sample_df['ranking'] = ranking.astype(int).astype(str) + ' of ' + str(amount)
                                     
        df = sample_df.sample(smpl_size)
        return df.reset_index()
    
## Visualisation

    data = sample_df(min_year, max_year, min_ambig_scr, max_ambig_scr, smpl_size)

    fig = px.scatter(data, x='ambig_scr', y='name', 
                     color='male_pc', size='ttl_freq',
                     hover_name='name', 
                     hover_data={'ambig_scr': True, 'ttl_freq': True, 'ranking':True,
                                 'male_pc': True, 'feme_pc': True,
                                 'weblink':True},
                     color_continuous_scale='RdBu', range_color=[0, 100], 
                     labels={'name': 'Name','ambig_scr': 'Ambiguity Score', 
                             'ttl_freq': 'Count', 'ranking':'Ranking',
                             'male_pc': '% as Male',
                             'feme_pc': '% as Female',
                             'weblink':'More Info'})

    fig.update_traces(marker=dict(size=data['ttl_freq']*3, line=dict(color='black', width=1)))

    fig.update_layout(title='Random Name Genrator',
                      xaxis_title='Gender Ambiguity Score',
                      yaxis_title='Names',
                      coloraxis_colorbar=dict(title='Male %'),
                      height=600,
                      plot_bgcolor='#f3f4f4',
                      clickmode='event+select')

    fig.update_xaxes(range=[0, 100])

    fig.show(renderer='notebook_connected')

### In the parentheses below, input... 
- Year Start, 
- Year End, 
- Min. Ambiguity, 
- Max. Ambiguity, 
- Number of Names to Output

In [30]:
random_name_viz(1920, 1922, 46, 92, 8)