In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

In [None]:
Hate_Universe = pd.read_csv('./Data/Hate_Universe.csv')

In [None]:
Jan6 = Hate_Universe[(Hate_Universe['Day'] >= '2021-01-01') & (Hate_Universe['Day']< '2021-01-11')]
Nov7 = Hate_Universe[(Hate_Universe['Day'] >= '2020-11-01') & (Hate_Universe['Day']< '2020-11-11')]

In [None]:
Jan6 = Jan6.dropna(subset=['Target'])
Nov7 = Nov7.dropna(subset= ['Target'])

# Converting to Datetime format
Jan6['Day'] = pd.to_datetime(Jan6['Day'])
Nov7['Day'] = pd.to_datetime(Nov7['Day'])

# Group by 'Day' and count the number of rows for each day
daily_counts_Jan6 = Jan6.groupby('Day').size()
daily_counts_Nov7 = Nov7.groupby('Day').size()

# Calculate the increase relative to the first day
relative_increase_Jan6 = ((daily_counts_Jan6 - daily_counts_Jan6.iloc[0])/(daily_counts_Jan6.iloc[0]))*100.00
relative_increase_Nov7 = ((daily_counts_Nov7 - daily_counts_Nov7.iloc[0])/(daily_counts_Nov7.iloc[0]))*100.00

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(relative_increase_Jan6.index, relative_increase_Jan6.values, marker='o', linestyle='-', alpha=0.7, markersize=6)
plt.axvline(x=pd.Timestamp('2021-01-06'), color='red', linestyle='--', alpha=0.7)
plt.text(pd.Timestamp('2021-01-06'), relative_increase_Jan6.loc['2021-01-06'], 'Capitol Attack', color='red', verticalalignment='bottom', horizontalalignment='right')
plt.title('Increase in Hate Links Relative to Jan 1, 2021')
plt.xlabel('Day')
plt.ylabel('Increase in Number of Hate Links (%)')
plt.xticks(rotation=45)
plt.gca().set_yticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_yticks()])  # Add '%' symbol to y-axis labels
plt.tight_layout()
plt.grid(False)  # Removing gridlines

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(relative_increase_Nov7.index, relative_increase_Nov7.values, marker='o', linestyle='-', alpha=0.7, markersize=6)
plt.axvline(x=pd.Timestamp('2020-11-03'), color='red', linestyle='--', alpha=0.7)

plt.axvline(x=pd.Timestamp('2020-11-07'), color='red', linestyle='--', alpha=0.7)
plt.text(pd.Timestamp('2020-11-03'), relative_increase_Nov7.loc['2020-11-03'], 'Election Day', color='red', verticalalignment='bottom', horizontalalignment='right')

plt.text(pd.Timestamp('2020-11-07'), relative_increase_Nov7.loc['2020-11-07'], 'President Elect Declared', color='red', verticalalignment='bottom', horizontalalignment='right')
plt.title('Increase in Hate Links Relative to Nov 1, 2020')
plt.xlabel('Day')
plt.ylabel('Increase in Number of Hate Links (%)')
plt.xticks(rotation=45)
plt.gca().set_yticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_yticks()])  # Add '%' symbol to y-axis labels
plt.tight_layout()
plt.grid(False)  # Removing gridlines
plt.show()

In [None]:

pre_election = Hate_Universe[(Hate_Universe['Day'] >= '2020-11-01') & (Hate_Universe['Day']< '2020-11-03')]

post_election = Hate_Universe[(Hate_Universe['Day'] >=  '2020-11-03') & (Hate_Universe['Day']< '2020-11-05')]


pre_Jan6 = Hate_Universe[(Hate_Universe['Day'] >= '2021-01-01') & (Hate_Universe['Day'] < '2021-01-06')]


post_Jan6 = Hate_Universe[(Hate_Universe['Day']  >= '2021-01-06') & (Hate_Universe['Day'] < '2021-01-11')]


In [None]:
pre_election = pre_election.dropna(subset=['Target'])
post_election = post_election.dropna(subset= ['Target'])
pre_Jan6 = pre_Jan6.dropna(subset=['Target'])
post_Jan6 = post_Jan6.dropna(subset= ['Target'])

In [None]:
Graph_pre_Nov7= nx.from_pandas_edgelist(pre_election, 'Source', 'Target',  create_using=nx.Graph())
Graph_post_Nov7 = nx.from_pandas_edgelist(post_election, 'Source', 'Target',  create_using=nx.Graph())

In [None]:

def calculate_percentage_change(pre, post):
    return ((post - pre) / pre) * 100

# Calculate properties for Graph_pre_Jan6
pre_density = nx.density(Graph_pre_Nov7)
pre_cliques = list(nx.find_cliques(Graph_pre_Nov7))
pre_max_clique_size = max(len(clique) for clique in pre_cliques)
pre_num_communities = nx.number_connected_components(Graph_pre_Nov7)
pre_largest_community = max(len(c) for c in nx.connected_components(Graph_pre_Nov7))
pre_clustering_coefficient = nx.average_clustering(Graph_pre_Nov7)
pre_assortativity = nx.assortativity.degree_assortativity_coefficient(Graph_pre_Nov7)

# Calculate properties for Graph_post_Jan6
post_density = nx.density(Graph_post_Nov7)
post_cliques = list(nx.find_cliques(Graph_post_Nov7))
post_max_clique_size = max(len(clique) for clique in post_cliques)
post_num_communities = nx.number_connected_components(Graph_post_Nov7)
post_largest_community = max(len(c) for c in nx.connected_components(Graph_post_Nov7))
post_clustering_coefficient = nx.average_clustering(Graph_post_Nov7)
post_assortativity = nx.assortativity.degree_assortativity_coefficient(Graph_post_Nov7)

In [None]:
data = {
    'Property': ['Density', 'Number of Cliques', 'Max Clique Size', 'Number of Communities',
                 'Size of Largest Community', 'Clustering Coefficient', 'Assortativity'],
    'Pre_election': [pre_density, len(pre_cliques), pre_max_clique_size, pre_num_communities,
                 pre_largest_community, pre_clustering_coefficient, pre_assortativity],
    'Post_election': [post_density, len(post_cliques), post_max_clique_size, post_num_communities,
                  post_largest_community, post_clustering_coefficient, post_assortativity]
}

df = pd.DataFrame(data)

for prop in data['Property']:
    pre_value = df.loc[df['Property'] == prop, 'Pre_election'].values[0]
    post_value = df.loc[df['Property'] == prop, 'Post_election'].values[0]
    percentage_change = calculate_percentage_change(pre_value, post_value)
    df.loc[df['Property'] == prop, 'Percentage Change'] = percentage_change


# Display the DataFrame
print(df)

In [None]:
Graph_pre_Jan6 = nx.from_pandas_edgelist(pre_Jan6, 'Source', 'Target',  create_using=nx.Graph())
Graph_post_Jan6 = nx.from_pandas_edgelist(post_Jan6, 'Source', 'Target',  create_using=nx.Graph())

In [None]:
# Calculate properties for Graph_pre_Jan6
pre_density = nx.density(Graph_pre_Jan6)
pre_cliques = list(nx.find_cliques(Graph_pre_Jan6))
pre_max_clique_size = max(len(clique) for clique in pre_cliques)
pre_num_communities = nx.number_connected_components(Graph_pre_Jan6)
pre_largest_community = max(len(c) for c in nx.connected_components(Graph_pre_Jan6))
pre_clustering_coefficient = nx.average_clustering(Graph_pre_Jan6)
pre_assortativity = nx.assortativity.degree_assortativity_coefficient(Graph_pre_Jan6)

# Calculate properties for Graph_post_Jan6
post_density = nx.density(Graph_post_Jan6)
post_cliques = list(nx.find_cliques(Graph_post_Jan6))
post_max_clique_size = max(len(clique) for clique in post_cliques)
post_num_communities = nx.number_connected_components(Graph_post_Jan6)
post_largest_community = max(len(c) for c in nx.connected_components(Graph_post_Jan6))
post_clustering_coefficient = nx.average_clustering(Graph_post_Jan6)
post_assortativity = nx.assortativity.degree_assortativity_coefficient(Graph_post_Jan6)


In [None]:
data = {
    'Property': ['Density', 'Number of Cliques', 'Max Clique Size', 'Number of Communities',
                 'Size of Largest Community', 'Clustering Coefficient', 'Assortativity'],
    'Pre_Jan6': [pre_density, len(pre_cliques), pre_max_clique_size, pre_num_communities,
                 pre_largest_community, pre_clustering_coefficient, pre_assortativity],
    'Post_Jan6': [post_density, len(post_cliques), post_max_clique_size, post_num_communities,
                  post_largest_community, post_clustering_coefficient, post_assortativity]
}

df = pd.DataFrame(data)

for prop in data['Property']:
    pre_value = df.loc[df['Property'] == prop, 'Pre_Jan6'].values[0]
    post_value = df.loc[df['Property'] == prop, 'Post_Jan6'].values[0]
    percentage_change = calculate_percentage_change(pre_value, post_value)
    df.loc[df['Property'] == prop, 'Percentage Change'] = percentage_change


# Display the DataFrame
print(df)


## Hate Type

In [None]:
df_Nov7 = pd.read_csv('./Data/df_Nov7_hate_type.csv')
df_jan6 = pd.read_csv('./Data/df_jan6_hate_type.csv')

In [None]:
Pre_jan6 = df_jan6[(df_jan6['Day'] >= '2021-01-01') & (df_jan6['Day'] <= '2021-01-05')]
Post_jan6 = df_jan6[(df_jan6['Day'] >= '2021-01-06') & (df_jan6['Day'] <= '2021-01-10')]
Pre_Nov7 = df_Nov7[(df_Nov7['Day'] >= '2020-11-02') & (df_Nov7['Day'] <= '2020-11-06')]
Post_Nov7 = df_Nov7[(df_Nov7['Day'] >= '2020-11-07') & (df_Nov7['Day'] <= '2020-11-11')]

In [None]:
boolean_columns = ['religion_prediction', 'race_prediction', 'gender_prediction',
                   'giso_prediction', 'immigration_prediction', 'ein_prediction', 
                   'antisemitism_prediction', 'SNS Source']

# Group by 'SNS' column and sum the boolean columns
hate_counts_pre = Pre_jan6[boolean_columns].groupby('SNS Source').sum()

# Group by 'SNS' column and sum the boolean columns
hate_counts_post = Post_jan6[boolean_columns].groupby('SNS Source').sum()

# Display the counts of hate types for each SNS
hate_counts_post

In [None]:
percentage_change = ((hate_counts_post - hate_counts_pre) / (hate_counts_pre +1)) * 100
percentage_change

In [None]:
total_pre = hate_counts_pre.sum()
total_post = hate_counts_post.sum()

# Calculate the percentage change without considering the SNS
percentage_change_total = ((total_post - total_pre) / total_pre) * 100

# Display the percentage change
print(percentage_change_total)

In [None]:
# Remove "prediction" and replace it with "hate" in the x-axis labels
x_labels = [label.replace('_prediction', ' Hate') for label in percentage_change_total.index]
# Plotting the percentage change
# Plotting the percentage change
plt.figure(figsize=(10, 6))
percentage_change_total.plot(kind='bar', color='skyblue')
plt.title('Change in Hate Type after January 6 Capitol attack')
plt.ylabel('Percentage Increase (%)')
plt.xticks(range(len(x_labels)), x_labels, rotation=0, ha='center')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
boolean_columns = ['religion_prediction', 'race_prediction', 'gender_prediction',
                   'giso_prediction', 'immigration_prediction', 'ein_prediction', 
                   'antisemitism_prediction', 'SNS Source']

# Group by 'SNS' column and sum the boolean columns
hate_counts_pre = Pre_Nov7[boolean_columns].groupby('SNS Source').sum()

# Group by 'SNS' column and sum the boolean columns
hate_counts_post = Post_Nov7[boolean_columns].groupby('SNS Source').sum()


In [None]:
percentage_change = ((hate_counts_post - hate_counts_pre) / (hate_counts_pre +1) ) * 100
percentage_change

In [None]:
total_pre = hate_counts_pre.sum()
total_post = hate_counts_post.sum()

# Calculate the percentage change without considering the SNS
percentage_change_total = ((total_post - total_pre) / total_pre) * 100

# Display the percentage change
print(percentage_change_total)

In [None]:
# Remove "prediction" and replace it with "hate" in the x-axis labels
x_labels = [label.replace('_prediction', ' Hate') for label in percentage_change_total.index]
# Plotting the percentage change
# Plotting the percentage change
plt.figure(figsize=(10, 6))
percentage_change_total.plot(kind='bar', color='skyblue')
plt.title('Change in Hate Type after January 6 Capitol attack')
plt.ylabel('Percentage Increase (%)')
plt.xticks(range(len(x_labels)), x_labels, rotation=0, ha='center')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

## Correlation 


In [None]:
Hate_Universe = pd.read_csv('./Data/Hate_Universe.csv')
Daily_count_flavors_df = pd.read_csv('./Data/Daily_count_flavors_df.csv', index_col='Day')

In [None]:

SNS_columns  = Hate_Universe['SNS Source'].unique().tolist()
print(SNS_columns)
Daily_count_sns_df = pd.DataFrame()

for column in SNS_columns:

    true_counts = Hate_Universe[Hate_Universe['SNS Source'] == column].groupby('Day').size()
    Daily_count_sns_df[column] = true_counts
    



In [None]:

correlations = {}
for col1 in Daily_count_flavors_df.columns:
    for col2 in Daily_count_sns_df.columns:
        correlation = Daily_count_flavors_df[col1].corr(Daily_count_sns_df[col2])
        correlations[(col1, col2)] = correlation

for pair, correlation in correlations.items():
    print(f"Correlation between {pair[0]} and {pair[1]}: {correlation}")

In [None]:
columns1, columns2 = zip(*correlations.keys())

# Extract correlation values
correlation_values = list(correlations.values())

# Create a DataFrame from the correlation values with columns as the original column names
correlation_df = pd.DataFrame({'Column1': columns1, 'Column2': columns2, 'Correlation': correlation_values})

# Pivot the DataFrame to create a correlation matrix
correlation_matrix = correlation_df.pivot(index='Column1', columns='Column2', values='Correlation')

# Remove the 'Instagram' column and row from the correlation matrix

correlation_matrix = correlation_matrix.drop('Instagram', axis=1)

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap between daily edge and hate type count across different SNS')
plt.xlabel('Social Networking Site')
plt.ylabel('Hate Type Detected')
plt.show()

## Telegram

In [None]:
SNS_Hate_Core = pd.read_csv("./Data/SNS_Hate_Core.csv")

In [None]:

pre_election_sns = SNS_Hate_Core[(SNS_Hate_Core['Day'] >= '2020-11-01') & (SNS_Hate_Core['Day']<= '2020-11-03')]

post_election_sns = SNS_Hate_Core[(SNS_Hate_Core['Day'] >  '2020-11-03') & (SNS_Hate_Core['Day']<= '2020-11-07')]


In [None]:
Graph_pre_election= nx.from_pandas_edgelist(pre_election_sns, 'SNS Source', 'SNS Target',  create_using=nx.MultiDiGraph())
Graph_post_election = nx.from_pandas_edgelist(post_election_sns, 'SNS Source', 'SNS Target',  create_using=nx.MultiDiGraph())

In [None]:

for graph in [Graph_pre_election, Graph_post_election]:
    indegree_dict = dict(graph.in_degree())
    outdegree_dict = dict(graph.out_degree())


    indegree_values = list(indegree_dict.values())
    outdegree_values = list(outdegree_dict.values())
    node_names = list(graph.nodes())


    plt.figure(figsize=(8, 6))
    plt.scatter(indegree_values, outdegree_values, color='blue', alpha=0.5)
    plt.xlabel('Indegree')
    plt.ylabel('Outdegree')
    plt.title(f'Scatterplot of Indegree vs Outdegree for {graph} hate core')
    plt.grid(True)

    for i, name in enumerate(node_names):
        plt.annotate(name, (indegree_values[i], outdegree_values[i]))


    # Determine the range for the diagonal line
    max_value = max(max(indegree_values), max(outdegree_values))
    min_value = min(min(indegree_values), min(outdegree_values))

    # Plot diagonal line
    plt.plot([min_value, max_value], [min_value, max_value], color='red', linestyle='--')

    plt.tight_layout()
    plt.show()

In [None]:
degree_pre_election = Graph_pre_election.degree('TG')
degree_post_election = Graph_post_election.degree('TG')

# 2. Calculate the increase in degree
increase_in_degree = ((degree_post_election - degree_pre_election)/degree_pre_election)*100

print("Increase in degree of node 'TG':", increase_in_degree)

In [None]:
total_in_links_pre_election = sum(dict(Graph_pre_election.in_degree()).values())
total_out_links_pre_election = sum(dict(Graph_pre_election.out_degree()).values())

total_in_links_post_election = sum(dict(Graph_post_election.in_degree()).values())
total_out_links_post_election = sum(dict(Graph_post_election.out_degree()).values())

# Calculate the in-degree and out-degree of node "TG" for both graphs
in_degree_pre_election_TG = Graph_pre_election.in_degree('TG')
out_degree_pre_election_TG = Graph_pre_election.out_degree('TG')

in_degree_post_election_TG = Graph_post_election.in_degree('TG')
out_degree_post_election_TG = Graph_post_election.out_degree('TG')

# Calculate the percentages
percent_in_links_pre_election = (in_degree_pre_election_TG / total_in_links_pre_election) * 100
percent_out_links_pre_election = (out_degree_pre_election_TG / total_out_links_pre_election) * 100

percent_in_links_post_election = (in_degree_post_election_TG / total_in_links_post_election) * 100
percent_out_links_post_election = (out_degree_post_election_TG / total_out_links_post_election) * 100

print("Percentage of in-links of node 'TG' in Graph_pre_election:", percent_in_links_pre_election)
print("Percentage of out-links of node 'TG' in Graph_pre_election:", percent_out_links_pre_election)

print("Percentage of in-links of node 'TG' in Graph_post_election:", percent_in_links_post_election)
print("Percentage of out-links of node 'TG' in Graph_post_election:", percent_out_links_post_election)