In [25]:
import pandas as pd
from google.cloud import bigquery
import plotly.express as px

client = bigquery.Client()
def run_query(query):
    query_job = client.query(query)
    rows_raw = query_job.result()
    # Convert to list of dicts. Required for st.cache_data to hash the return value.
    rows = [dict(row) for row in rows_raw]
    df = pd.DataFrame(rows)
    return df

query = f"""
    SELECT * 
    FROM `crucial-strata-384013.HateScann_DataSet.UserName_HateScann`
    LIMIT 200
"""
df_queried = run_query(query)

In [26]:
df_queried.sort_values(by=['hate_label'],ascending=False)

Unnamed: 0,user_name,name_lastname,nr_followers,tweets_account,tweets_analysed,hate_label,Religion_class,Gender_class,Race_class,Politics_class,Sports_class
16,RatiosCrazy,Successful Ratios,486964,2231,30,2.0,0.11,0.39,0.1,0.34,0.06
20,amandabynes,amanda bynes,2420040,12,30,2.0,0.04,0.27,0.23,0.24,0.21
2,ittybittybabyy1,Luh Baby,78534,18521,30,1.666667,0.07,0.18,0.27,0.43,0.04
13,KyrieIrving,Chief Hélà🤞🏾A11Even Tribe,4749614,3703,30,1.608696,0.18,0.37,0.1,0.21,0.14
5,dillondanis,Dillon Danis,336298,920,30,1.56,0.11,0.33,0.08,0.36,0.13
18,TopGirlKeiko,"Top Girl Keiko, J.D.",61630,69372,30,1.517241,0.19,0.32,0.04,0.37,0.09
10,katyperry,KATY PERRY,107670809,11927,30,1.466667,0.09,0.5,0.08,0.27,0.06
8,ReachTWR,THE WAR ROOM,218313,1989,30,1.36,0.24,0.31,0.06,0.16,0.23
14,jimmyfallon,Jimmy Fallon,50502994,14703,30,1.285714,0.11,0.31,0.07,0.4,0.11
15,rihanna,Rihanna,108252917,10691,30,1.25,0.11,0.5,0.09,0.2,0.11


In [27]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3)

pca_df = pca.fit_transform(df_queried[['Religion_class', 'Gender_class', 'Race_class', 'Politics_class', 'Sports_class']])

In [28]:
pca_df = pd.DataFrame(pca_df, columns=['pca_1', 'pca_2', 'pca_3'])
df_combined = pd.concat([df_queried, pca_df], axis=1)

In [29]:
def transform_hate_label(scale):
    if 0 <= scale < 0.85:
        return 0
    elif 0.85 <= scale < 1.5:
        return 1
    elif scale >= 1.5:
        return 2

In [30]:
lala = transform_hate_label(1.76)

In [31]:
type(lala)

int

In [32]:
# Calculate the normalized sizes based on 'nr_followers'
max_followers = df_combined['nr_followers'].max()
min_followers = df_combined['nr_followers'].min()
df_combined['normalized_size'] = ((df_combined['nr_followers'] - min_followers) / (max_followers - min_followers)) * 100

# Apply the transformation to the 'hate_label' column
df_combined['hate_label'] = df_combined['hate_label'].apply(transform_hate_label)
df_combined['hate_label_name'] = df_combined['hate_label']
df_combined['hate_label_name'] = df_combined['hate_label_name'].replace(0, "Normal")
df_combined['hate_label_name'] = df_combined['hate_label_name'].replace(1, "Offensive")
df_combined['hate_label_name'] = df_combined['hate_label_name'].replace(2, "Hate")

fig = px.scatter_3d(df_combined, x='pca_1', y='pca_2', z='pca_3', color='hate_label',
                    size='normalized_size', hover_name='name_lastname', color_continuous_scale='temps',
                    range_color=[0, 2], size_max=50, custom_data=['hate_label', 'name_lastname', 'nr_followers', 'hate_label_name'])

fig.update_layout(
    scene=dict(
        xaxis_title='PCA 1',
        yaxis_title='PCA 2',
        zaxis_title='PCA 3',
        camera=dict(
            eye=dict(x=1, y=-1.5, z=1)
        )
    ),
    margin=dict(l=0, r=0, b=0, t=0)
)

fig.update_traces(opacity=1, marker=dict(symbol='circle'), hovertemplate='<b>%{hovertext}</b><br>Hate Label: %{customdata[3]}<br>Followers: %{customdata[2]:,.0f}')
fig.update_layout(coloraxis_colorbar=dict(title='Hate Label'), coloraxis_colorbar_len=1, coloraxis_colorbar_thickness=15)

fig.show()