In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# !pip install OpenAI
import json
import pandas as pd
import numpy as np
from ParserDB import ParserDB

In [3]:
def stances_by_thread(data):
    user_stances = {} # {user: {supports: 3, ...}}
    
    with open(data, "r") as f:
        for l in f.readlines():
            response_text = json.loads(json.loads(l)["response"]["body"]["output"][0]["content"][0]["text"])
            for thread in response_text["threads"]:
                for user in thread["users"]:
                    username = user["user"]
                    userstance = user["stance"]
                    cur_user_stance = user_stances.get(username, {})
                    cur_user_stance[userstance] = cur_user_stance.get(userstance, 0) + 1
                    user_stances[username] = cur_user_stance
    return user_stances

def define_user_stance(stance_counts):
    if stance_counts.get("поддерживает", 0) == stance_counts.get("не поддерживает", 0):
        return "невозможно определить"
    return "поддерживает" if stance_counts.get("поддерживает", 0) > stance_counts.get("не поддерживает", 0) else "не поддерживает"

def make_graph(threads_by_videos, user_stances, folder_to_save):
    nodes = set()
    edges = set()
    
    reply_count = 0
    problem_count = 0
    for videoId, videoThreads, query in threads_by_videos:
        videoThreads = json.loads(videoThreads)
        users = set()
        thread_authors = dict() # {topLevelComment: author}
        nodes.add((videoId, "video", query))
        for thread in videoThreads:
            thread = json.loads(thread)
            for comment in thread:
                users.add(comment["authorDisplayName"])
                nodes.add((comment["authorDisplayName"], "user", "USER"))
                if comment["topLevelComment"] == comment["commentId"]:
                    thread_authors[comment["topLevelComment"]] = comment["authorDisplayName"]
                
        for thread in videoThreads:
            thread = json.loads(thread)
            thread_author = thread_authors[thread[0]["topLevelComment"]]
            for comment in thread:
                if comment["authorDisplayName"] == thread_author:
                    edges.add((comment["authorDisplayName"], videoId))
                elif "@" not in comment["text"]:
                    edges.add((comment["authorDisplayName"], thread_author))
                else:
                    reply_to = None
                    for user in users:
                        if user in comment["text"]:
                            reply_to = user
                            break
                    if reply_to is not None:
                        edges.add((comment["authorDisplayName"], reply_to))
                    else:
                        problem_count += 1

    nodes = [dict(id=n[0], label=n[0], nodeType=n[1], query=n[2], stance=define_user_stance(user_stances.get(n[0], {}))) for n in nodes]
    edges = [dict(source=e[0], target=e[1]) for e in edges]
    
    nodes_df = pd.DataFrame(nodes)
    nodes_df["size"] = np.where(nodes_df["nodeType"] == "video", 20, 1)
    nodes_df.to_csv(f"{folder_to_save}/nodes.csv", index=False, encoding="utf-8")
    pd.DataFrame(edges).to_csv(f"{folder_to_save}/edges.csv", index=False, encoding="utf-8")

In [4]:
p = ParserDB(api_key_file="./data/data_json/api_token.txt", database="./data/data.db")
chemtrails_threads = p.get_threads("химтрейлы")
chemtrails_stances = stances_by_thread("./data/openai_responses/batch_chemtrails_output.jsonl")

In [5]:
make_graph(chemtrails_threads, chemtrails_stances, "data/gephi_chem_stances")

In [62]:
# EI-index: (E - I) / (E + I)
# E - external connections (responses to opponents); I - internal connections (responses to group-members)
def calculate_ei(nodes, edges):
    merged = pd.merge(
        chemtrails_edges,
        chemtrails_nodes.rename(columns={"label": "source", "stance": "source_stance"}),
        on="source"
    )[["target", "source", "source_stance"]]
    merged = pd.merge(
        merged,
        chemtrails_nodes.rename(columns={"label": "target", "stance": "target_stance"}),
        on="target"
    )[["target", "source", "source_stance", "target_stance"]]
    merged = merged[(merged["source_stance"] != "невозможно определить") & (merged["target_stance"] != "невозможно определить")]
    
    reply_counts = merged.groupby(by=["source_stance", "target_stance"]).size()
    
    I_support = reply_counts.loc["поддерживает"]["поддерживает"]
    I_oppose = reply_counts.loc["не поддерживает"]["не поддерживает"]
    E_support = reply_counts.loc["поддерживает"]["не поддерживает"]
    E_oppose = reply_counts.loc["не поддерживает"]["поддерживает"]

    # print(I_support, I_oppose, E_support, E_oppose)
    
    EI_support = (E_support - I_support) / (E_support + I_support)
    EI_oppose = (E_oppose - I_oppose) / (E_oppose + I_oppose)
    return EI_support, EI_oppose

In [4]:
chemtrails_nodes = pd.read_csv("./data/gephi_chem_stances/nodes.csv")
chemtrails_edges = pd.read_csv("./data/gephi_chem_stances/edges.csv")

In [63]:
calculate_ei(chemtrails_nodes, chemtrails_edges)

(-0.5978526110297706, 0.40804597701149425)

In [45]:
chemtrails_nodes

Unnamed: 0,id,label,nodeType,query,stance,size
0,@MariyaMash,@MariyaMash,user,USER,поддерживает,1
1,@СергейМалинкин-щ2п,@СергейМалинкин-щ2п,user,USER,невозможно определить,1
2,@DmitryTomson,@DmitryTomson,user,USER,не поддерживает,1
3,@ТоняЕргешова,@ТоняЕргешова,user,USER,поддерживает,1
4,@СветланаСтолярова-я1ш,@СветланаСтолярова-я1ш,user,USER,не поддерживает,1
...,...,...,...,...,...,...
15907,@время_пришло,@время_пришло,user,USER,поддерживает,1
15908,@szsx9560,@szsx9560,user,USER,невозможно определить,1
15909,@Красноармеец-д6х,@Красноармеец-д6х,user,USER,невозможно определить,1
15910,@searcher4187,@searcher4187,user,USER,поддерживает,1


In [15]:
chemtrails_edges

Unnamed: 0,source,target
0,@TatynaMahlaeva.,@Rada.887
1,@ВикторА-х2щ,@Aaaassss-t2e
2,@sergeishumchenko5895,@GsekvxaKhdklg
3,@schism.,hNwNYva9YP4
4,@Sun111moon,R6YG4Y1-p14
...,...,...
27279,@ТимурМорозов-б1м,@ElenaSanktPeterbyrg
27280,@Ebonitoviy_Voin,hNwNYva9YP4
27281,@Litis465,@Берегиня-ф3у
27282,@ndzeyapolesije3762,yWbED2X1m-o
