In [3]:
# Import packages
from tools.to_read import *
from tools.to_plot import *
from tools.to_do import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap

# Set up
root='/home/jacoponudo/Documents/Size_effects/'


# Power Law distribution

#### Fitta le power law

In [38]:
import pandas as pd
import powerlaw as pwl
import matplotlib.pyplot as plt
import os

# Definizione delle piattaforme da analizzare
platforms = ['gab', 'reddit', 'twitter', 'usenet', 'voat', 'facebook']

# Funzione per caricare e analizzare i dati di ciascuna piattaforma
def analyze_platform_data(platform, root, columns_to_read, standard_columns):
    try:
        # Carica i dati da un file Parquet
        data = pd.read_parquet(root + 'DATA/' + platform + '/' + platform + '_raw_data.parquet', 
                                columns=columns_to_read[platform])
        data.columns = standard_columns
        data['timestamp'] = pd.to_datetime(data['timestamp'])

        # Calcola il numero di utenti unici per post
        unique_users_per_post = data.groupby('post_id')['user_id'].nunique().reset_index()
        unique_users_per_post.columns = ['post_id', 'unique_users_count']

        # Salva i risultati in un CSV
        output_csv_path = os.path.join(root, 'PAPER', 'output', '1_section', f'1_users_in_thread_{platform}.csv')
        unique_users_per_post.to_csv(output_csv_path, index=False)
        
        # Fit della distribuzione Power Law
        fit_function = pwl.Fit(list(unique_users_per_post['unique_users_count']))
        
        # Stampa dei parametri della Power Law
        print(f"\nPlatform: {platform}")
        print(f"Alpha: {fit_function.power_law.alpha}")
        print(f"Sigma: {fit_function.power_law.sigma}")
        print(f"D: {fit_function.power_law.D}")
        
        # Confronto tra la Power Law e una distribuzione esponenziale (opzionale)
        comparison = fit_function.distribution_compare('power_law', 'exponential')
        print(f"Comparison between Power Law and Exponential: {comparison}")
        
        # Plot dei dati e della Power Law adattata
        plt.figure(figsize=(8, 6))
        fit_function.plot_pdf(color='b', linestyle='-', label='Empirical Data')  # PDF empirico
        fit_function.power_law.plot_pdf(color='r', linestyle='--', label='Fitted Power Law')  # Fit della Power Law
        plt.title(f"Power Law Fit for {platform}")
        plt.xlabel('Unique Users per Post')
        plt.ylabel('Probability')
        plt.legend()
        plt.grid(True)

        # Salva il grafico come PNG
        plot_path = os.path.join(root, 'PAPER', 'output', 'plots', f'{platform}_power_law_fit.png')
        plt.savefig(plot_path)
        plt.close()
        
    except Exception as e:
        print(f"Error processing platform {platform}: {e}")



# Ciclo per analizzare ogni piattaforma
for platform in platforms:
    analyze_platform_data(platform, root, columns_to_read, standard_columns)


Calculating best minimal value for power law fit
xmin progress: 99%
Platform: gab
Alpha: 3.5292471415993325
Sigma: 0.03858411923607005
D: 0.02201761290675619
Comparison between Power Law and Exponential: (233.1645088637785, 5.17112302132254e-09)
Calculating best minimal value for power law fit
xmin progress: 99%
Platform: reddit
Alpha: 1.988425228237548
Sigma: 0.006076205998567572
D: 0.02561273260329
Comparison between Power Law and Exponential: (9944.559997522074, 0.0)
Calculating best minimal value for power law fit
xmin progress: 99%
Platform: twitter
Alpha: 2.6312335950842725
Sigma: 0.03133513078484209
D: 0.023856980860095134
Comparison between Power Law and Exponential: (618.1714405113767, 8.789545257449831e-23)


Values less than or equal to 0 in data. Throwing out 0 or negative values


Calculating best minimal value for power law fit
xmin progress: 99%
Platform: usenet
Alpha: 4.099637022234884
Sigma: 0.0920053133382796
D: 0.015334687351538312
Comparison between Power Law and Exponential: (99.0338314118738, 1.2524065794596844e-07)
Calculating best minimal value for power law fit
xmin progress: 99%
Platform: voat
Alpha: 6.1853910803927645
Sigma: 0.30608395751402173
D: 0.028074366708991882
Comparison between Power Law and Exponential: (13.29339991551608, 0.20204328005220784)
Calculating best minimal value for power law fit
xmin progress: 99%
Platform: facebook
Alpha: 2.501686647475684
Sigma: 0.06295397916299462
D: 0.017124620461134832
Comparison between Power Law and Exponential: (203.93306221783251, 3.418352652744081e-05)
