# Group Project - Social Media Usage

In [40]:
import pandas as pd
import dash
from dash import dcc, html
from dash.dash_table import DataTable


In [41]:
data = pd.read_csv("../data/Social media.csv")


In [45]:
data2 = data.copy()
dfs = {}
for i in data2['dimension'].unique().tolist():
    dfs[i] = data2[data2['dimension'] == i]

dim = dfs.keys()
df_age = dfs['Age'].copy()
df_gender = dfs['Gender'].copy()
df_income = dfs['Income'].copy()
df_political = dfs['Political Affiliation'].copy()
df_race = dfs['Race & Ethnicity'].copy()
df_age.replace('<1', 0, inplace=True)
df_age.replace('Ages 18-29', '18-29', inplace=True)
df_income.replace('Less than $30,000', '<$30,000', inplace=True)
df_list = [df_age, df_gender, df_income, df_political, df_race]
for df in df_list:
    df['platform'] = df['platform'].str.replace(' ', '')
    
# Tidy data
df_age = df_age.pivot(index = 'platform', columns = 'category', values = 'percentage').reset_index()
df_age.columns.name = None
df_gender = df_gender.pivot(index = 'platform', columns = 'category', values = 'percentage').reset_index()
df_gender.columns.name = None
df_income = df_income.pivot(index = 'platform', columns = 'category', values = 'percentage').reset_index()
df_income.columns.name = None
df_political = df_political.pivot(index = 'platform', columns = 'category', values = 'percentage').reset_index()
df_political.columns.name = None
df_race = df_race.pivot(index = 'platform', columns = 'category', values = 'percentage').reset_index()
df_race.columns.name = None

df_income = df_income[['platform', '<$30,000', '$30,000- $69,999', '$70,000- $99,999', '$100,000+']]

In [46]:
web = pd.read_html("https://www.pewresearch.org/internet/fact-sheet/social-media/?tabItem=428a8f10-3b74-4b36-ad2d-183a4ba27180")
education = web[5]
community = web[6]
education.rename(columns = {'Unnamed: 0': 'platform', 'Some college': 'College'}, inplace = True)
community.rename(columns = {'Unnamed: 0': 'platform'}, inplace = True)

education['platform'] = education['platform'].str.replace(' ','')
community['platform'] = community['platform'].str.replace(' ','')
df_education = education.sort_values(by ='platform')
df_community = community.sort_values(by ='platform')

In [49]:
# Dashboard
app = dash.Dash(__name__)

common_caption = html.Div([
    html.P("Note: Respondents who did not give an answer are not shown.", style={'fontSize': '0.8em', 'textAlign': 'left'}),
    html.P("Source: Survey of U.S. adults conducted May 19 - Sep 5, 2023.", style={'fontSize': '0.8em', 'textAlign': 'left'})
])

common_style_cell={'textAlign': 'center',
                    'fontSize': '14px',
                    'fontFamily': 'Arial'}

common_style_header={'backgroundColor': 'lightgrey', 
                              'fontWeight': 'bold',
                              'fontFamily': 'Arial',
                              'fontSize': '14px',
                              'textAlign': 'center'}

# Layout of the app
app.layout = html.Div(
    style = {'fontFamily': 'Arial',
            'backgroundColor': '#F5F5F5',
            'padding': '10px'},
    children = [html.H1("Social Media Usage by % U.S. adults",
                        style={'fontSize': '30px', 
                               'textAlign': 'center'}),
                
    dcc.Tabs([
        dcc.Tab(label='Age', style = {'fontSize': '14px'},
                children=[
            DataTable(
                id='table-age',
                columns=[{"name": i, "id": i} for i in df_age.columns],
                data=df_age.to_dict('records'),
                style_cell=common_style_cell,
                style_header=common_style_header
            ),
            common_caption
        ]),
        
        dcc.Tab(label='Gender', style = {'fontSize': '14px'}, 
                children=[
            DataTable(
                id='table-gender',
                columns=[{"name": i, "id": i} for i in df_gender.columns],
                data=df_gender.to_dict('records'),
                style_cell=common_style_cell,
                style_header=common_style_header,
            ),
            common_caption
        ]),

        dcc.Tab(label='Income levels', style = {'fontSize': '14px'},
                children=[
            DataTable(
                id='table-income',
                columns=[{"name": i, "id": i} for i in df_income.columns],
                data=df_income.to_dict('records'),
                style_cell=common_style_cell,
                style_header=common_style_header,
            ),
            common_caption
        ]),
        
        dcc.Tab(label='Race and Ethnicity', style = {'fontSize': '14px'},
                children=[
            DataTable(
                id='table-race',
                columns=[{"name": i, "id": i} for i in df_race.columns],
                data=df_race.to_dict('records'),
                style_cell=common_style_cell,
                style_header=common_style_header,
            ),
            html.Div([
                html.P("* Estimates for Asian adults are representative of English speakers only.", 
                   style={'fontSize': '0.8em', 'textAlign': 'left'}),
                html.P("Note: White, Black and Asian adults include those who report being only one race and are not Hispanic. Hispanics are of any race. Respondents who did not give an answer are not shown.", 
                   style={'fontSize': '0.8em', 'textAlign': 'left'}),
                html.P("Source: Survey of U.S. adults conducted May 19-Sept. 5, 2023.",
                       style={'fontSize': '0.8em', 'textAlign': 'left'})
            ])
        ]),
        
        dcc.Tab(label='Political Affiliation', style = {'fontSize': '14px'}, 
                children=[
            DataTable(
                id='table-political',
                columns=[{"name": i, "id": i} for i in df_political.columns],
                data=df_political.to_dict('records'),
                style_cell=common_style_cell,
                style_header=common_style_header,
            ),
            common_caption
        ]),
        
        dcc.Tab(label='Education level', style = {'fontSize': '14px'}, 
                children=[
            DataTable(
                id='table-education',
                columns=[{"name": i, "id": i} for i in df_education.columns],
                data=df_education.to_dict('records'),
                style_cell=common_style_cell,
                style_header=common_style_header,
            ),
            common_caption
        ]),
        
        dcc.Tab(label='Community', style = {'fontSize': '14px'}, 
                children=[
            DataTable(
                id='table-community',
                columns=[{"name": i, "id": i} for i in df_community.columns],
                data=df_community.to_dict('records'),
                style_cell=common_style_cell,
                style_header=common_style_header,
            ),
            common_caption
        ]),
    ])
])


# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)

In [48]:
from dash.dependencies import Input, Output
import plotly.express as px


df2 = pd.DataFrame(data2)
df2['category'] = df2['category'].replace({'Ages 18-29': '18-29'})
df2['platform'] = df2['platform'].str.replace(' ', '')



web = pd.read_html("https://www.pewresearch.org/internet/fact-sheet/social-media/?tabItem=428a8f10-3b74-4b36-ad2d-183a4ba27180")
education = web[5]
community = web[6]

# Rename columns for consistency and clean up platform names
education.rename(columns={'Unnamed: 0': 'platform', 'Some college': 'College'}, inplace=True)
community.rename(columns={'Unnamed: 0': 'platform'}, inplace=True)
education['platform'] = education['platform'].str.replace(' ', '')
community['platform'] = community['platform'].str.replace(' ', '')

# Convert `education` and `community` to long format (melt)
df_education = education.melt(id_vars='platform', var_name='category', value_name='percentage')
df_education['dimension'] = 'Education'

df_community = community.melt(id_vars='platform', var_name='category', value_name='percentage')
df_community['dimension'] = 'Community'

df3 = pd.concat([df2, df_education, df_community], ignore_index=True)

df3['percentage'] = df3['percentage'].replace({'<1': '0.5'}).astype(float)
df3['display_percentage'] = df3['percentage'].replace({0.5: '<1'})


app = dash.Dash(__name__)


app.layout = html.Div([
    html.H1("Platform Data Dashboard"),
    
    html.Label("Select a Platform:"),
    dcc.Dropdown(
        id="Platform-dropdown",
        options=[{'label': platform, 'value': platform} for platform in df3['platform'].unique()],
        placeholder="Select a platform"
    ),
    
    html.Label("Select a Dimension:"),
    dcc.Dropdown(
        id="Dimension-dropdown",
        options=[{'label': dimension, 'value': dimension} for dimension in df3['dimension'].unique()],
        placeholder="Select a dimension"
    ),
    
    dcc.Graph(id="platform-graph"),
])

@app.callback(
    Output("platform-graph", "figure"),
    [Input("Platform-dropdown", "value"),
     Input("Dimension-dropdown", "value")]
)
def update_graph(selected_platform, selected_dimension):
    filtered_data = df3[(df3['platform'] == selected_platform) & (df3['dimension'] == selected_dimension)]
    
    if filtered_data.empty:
        return {}

    fig = px.bar(filtered_data, x='category', y='percentage',
                 title=f'{selected_dimension} distribution for {selected_platform}',
                 labels={'display_percentage': 'Percentage Category', 'category': 'Category'},
                 text='display_percentage',
                 color='category')
    


    return fig

if __name__ == '__main__':
    app.run_server(debug=True)
