In [3]:
import os
import numpy as np
import pandas as pd
import pickle

def compute_TBI_from_tdl(event_folder):
    """
    Compute the Temporal Burst Influence (TBI) for each user using the TDL edges.
    The TDL layer contains an 'edges.txt' file with lines formatted as:
    author1;author2;weight
    Here, we assume that the weight on each edge reflects the burst value of interactions.
    
    For each user, we collect all weights from edges in which the user appears (either as source or target).
    Then, for each user u_i, we compute:
      mu = mean(weight list)
      sigma = standard deviation(weight list)
    The revised TBI is defined as:
      TBI(u_i) = (sigma - mu) / (sigma + mu)
    If sigma + mu equals zero, TBI is defined as 0.
    
    The resulting dictionary mapping user IDs to their TBI value is saved as "TBI_values.pkl"
    in the given event folder.
    """
    # Path to the TDL edges file
    tdl_file = os.path.join(event_folder, "network", "tdl", "edges.txt")
    if not os.path.exists(tdl_file):
        raise FileNotFoundError(f"TDL edges file not found: {tdl_file}")
    
    # Read the TDL edges file; expect format: author1;author2;weight
    df = pd.read_csv(tdl_file, sep=";", header=None, names=["u", "v", "weight"], engine='python')
    
    # Create a dictionary mapping each user to a list of weights from incident edges.
    user_weights = {}
    for idx, row in df.iterrows():
        u, v, w = row["u"], row["v"], row["weight"]
        # Add weight for user u
        if u not in user_weights:
            user_weights[u] = []
        user_weights[u].append(w)
        # Add weight for user v
        if v not in user_weights:
            user_weights[v] = []
        user_weights[v].append(w)
    
    # Compute TBI for each user using the revised formula.
    tbi_dict = {}
    for user, weights in user_weights.items():
        weights = np.array(weights)
        mu = np.mean(weights)
        sigma = np.std(weights)
        denom = sigma + mu
        if denom > 0:
            tbi = (sigma - mu) / denom
        else:
            tbi = 0
        tbi_dict[user] = tbi
    
    # Save the TBI dictionary to a pickle file in the event folder.
    output_file = os.path.join(event_folder, "TBI_values.pkl")
    with open(output_file, "wb") as f:
        pickle.dump(tbi_dict, f)
    print(f"TBI values saved to: {output_file}")
    
    return tbi_dict

In [4]:
import os
import pickle

def compute_SSC_from_asl(event_folder):
    """
    Compute the revised Sentiment Synchronization Coefficient (SSC) for each user using the ASL edges,
    optimized to avoid loading the entire file into memory.
    
    The ASL layer contains an 'edges.txt' file with lines formatted as:
    author1;author2;weight
    where 'weight' represents the cosine similarity between the affective vectors of two users.
    
    For each user, we accumulate the squared weight of each incident edge and compute:
      SSC(u_i) = (1 / degree(u_i)) * sum_{u_j in N(u_i)} (weight)^2
    
    The resulting dictionary mapping each user to its SSC value is saved as 'SSC_values.pkl'.
    """
    asl_file = os.path.join(event_folder, "network", "asl", "edges.txt")
    if not os.path.exists(asl_file):
        raise FileNotFoundError(f"ASL edges file not found: {asl_file}")
    
    # Initialize an empty dictionary to accumulate squared weights.
    user_weights_sq = {}
    
    # Open and process the file line-by-line to minimize memory usage.
    with open(asl_file, "r", encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split(";")
            if len(parts) != 3:
                continue
            u, v, w_str = parts
            try:
                w = float(w_str)
            except ValueError:
                continue
            w_sq = w ** 2  # square the cosine similarity to emphasize strong alignment
            
            # Update user u
            if u in user_weights_sq:
                user_weights_sq[u] += w_sq
                # Also count the occurrence for averaging
                user_weights_sq[u + "_count"] += 1
            else:
                user_weights_sq[u] = w_sq
                user_weights_sq[u + "_count"] = 1
                
            # Update user v
            if v in user_weights_sq:
                user_weights_sq[v] += w_sq
                user_weights_sq[v + "_count"] += 1
            else:
                user_weights_sq[v] = w_sq
                user_weights_sq[v + "_count"] = 1
    
    # Compute SSC for each user as the average of the squared weights.
    ssc_dict = {}
    # Iterate over keys and skip the '_count' ones.
    for key in list(user_weights_sq.keys()):
        if key.endswith("_count"):
            continue
        count_key = key + "_count"
        if count_key in user_weights_sq and user_weights_sq[count_key] > 0:
            ssc_dict[key] = user_weights_sq[key] / user_weights_sq[count_key]
        else:
            ssc_dict[key] = 0

    # Save SSC dictionary to a pickle file.
    output_file = os.path.join(event_folder, "SSC_values.pkl")
    with open(output_file, "wb") as f:
        pickle.dump(ssc_dict, f)
    print(f"SSC values saved to: {output_file}")
    
    return ssc_dict

# Example usage:
# event_folder = "data/2008_elections"
# ssc_values = compute_SSC_from_asl(event_folder)

In [7]:
import os
import re
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from collections import Counter
from nltk.tokenize import word_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.preprocessing import StandardScaler
from scipy.stats import pearsonr

# Make sure you have already downloaded nltk data:
# import nltk
# nltk.download('punkt')

def compute_negative_word_distribution(event_folder, authors_perc=0.1, top_k_words=20):
    """
    For the top authors by content volume, aggregate their content and compute the frequency distribution
    of negative words based on VADER's lexicon (words with sentiment scores < -0.8).
    
    Returns:
        neg_word_freq (list): Top negative words with their frequencies.
        radical_keywords (list): Top negative words (longer than 3 characters) to be used as radical keywords.
    """
    authors_file = os.path.join(event_folder, "network", "authors.txt")
    contents_file = os.path.join(event_folder, "cslasl-pre", "contents.txt")
    
    with open(authors_file, "r", encoding="utf-8") as f:
        authors_list = [line.strip() for line in f if line.strip()]
    with open(contents_file, "r", encoding="utf-8") as f:
        contents_list = [line for line in f if line]
        
    if len(authors_list) != len(contents_list):
        raise ValueError("Mismatch between number of authors and contents.")
    
    data_df = pd.DataFrame({"author": authors_list, "content": contents_list})
    aggregated_text = " ".join(data_df["content"].tolist())
    aggregated_text = aggregated_text.lower()
    aggregated_text = re.sub(r'[^a-z\s]', ' ', aggregated_text)
    tokens = word_tokenize(aggregated_text)
    
    sia = SentimentIntensityAnalyzer()
    vader_lexicon = sia.lexicon
    
    negative_tokens = [token for token in tokens if token in vader_lexicon and vader_lexicon[token] < -0.8]
    freq_counter = Counter(negative_tokens)
    neg_word_freq = freq_counter.most_common(top_k_words)
    radical_keywords = [word for word, freq in neg_word_freq if len(word) > 3]
    
    return neg_word_freq, radical_keywords

def analyze_tbi_ssc(tbi_dict, ssc_dict, event_folder, radical_keywords):
    """
    This function performs the following steps:
      1. Loads the authors and contents from the event folder.
      2. Computes a radicalization score for each author based on the frequency of radical keywords.
      3. Creates a binary radicalization label using the median radicalization score.
      4. Merges the TBI and SSC values (provided as dictionaries) with the radicalization data.
      5. Adds an interaction term (TBI * SSC) to capture the synergistic effect.
      6. Generates visualizations (histograms and boxplots) of TBI, SSC, and their interaction by radicalization status.
      7. Performs logistic regression with TBI, SSC, and the interaction term as predictors of radicalization.
      8. Computes Pearson correlation coefficients.
      
    The merged DataFrame and statistical results are saved for further analysis.
    """
    # Load authors and contents
    authors_file = os.path.join(event_folder, "network", "authors.txt")
    contents_file = os.path.join(event_folder, "cslasl-pre", "contents.txt")
    
    with open(authors_file, "r", encoding="utf-8") as f:
        authors = [line.strip() for line in f if line.strip()]
    with open(contents_file, "r", encoding="utf-8") as f:
        contents = [line for line in f if line]
    
    if len(authors) != len(contents):
        raise ValueError("Mismatch between number of authors and contents.")
    
    rad_df = pd.DataFrame({"author": authors, "content": contents})
    
    # Compute radicalization score based on frequency of radical keywords
    def compute_radical_score(text):
        text_lower = text.lower()
        score = 0
        for kw in radical_keywords:
            score += len(re.findall(r'\b' + re.escape(kw) + r'\b', text_lower))
        words = text_lower.split()
        return score / len(words) if words else 0.0
    
    rad_df["rad_score"] = rad_df["content"].apply(compute_radical_score)
    
    # Create binary radicalization label using the median of the radical score
    median_score = rad_df["rad_score"].median()
    rad_df["radical"] = (rad_df["rad_score"] > median_score).astype(int)
    
    # Merge TBI and SSC dictionaries with radicalization data
    tbi_df = pd.DataFrame(list(tbi_dict.items()), columns=["author", "TBI"])
    ssc_df = pd.DataFrame(list(ssc_dict.items()), columns=["author", "SSC"])
    merged_df = pd.merge(rad_df, tbi_df, on="author", how="inner")
    merged_df = pd.merge(merged_df, ssc_df, on="author", how="inner")
    
    # Ensure numeric types
    merged_df["TBI"] = pd.to_numeric(merged_df["TBI"], errors="coerce")
    merged_df["SSC"] = pd.to_numeric(merged_df["SSC"], errors="coerce")
    merged_df["radical"] = merged_df["radical"].astype(int)
    
    # Create interaction term between TBI and SSC
    merged_df["TBI_SSC"] = merged_df["TBI"] * merged_df["SSC"]
    
    # Visualization: Histograms and Boxplots for TBI, SSC, and Interaction Term
    plt.figure(figsize=(10,6))
    sns.histplot(data=merged_df, x="TBI", hue="radical", kde=True, bins=30)
    plt.title("Distribution of Temporal Burst Influence (TBI) by Radicalization Status")
    plt.xlabel("TBI")
    plt.ylabel("Frequency")
    plt.savefig(os.path.join(event_folder, "TBI_distribution_radical.png"))
    plt.close()
    
    plt.figure(figsize=(10,6))
    sns.boxplot(x="radical", y="TBI", data=merged_df)
    plt.title("TBI by Radicalization Status")
    plt.xlabel("Radicalization Status (0 = Non-Radicalized, 1 = Radicalized)")
    plt.ylabel("TBI")
    plt.savefig(os.path.join(event_folder, "TBI_boxplot_radical.png"))
    plt.close()
    
    plt.figure(figsize=(10,6))
    sns.histplot(data=merged_df, x="SSC", hue="radical", kde=True, bins=30)
    plt.title("Distribution of Sentiment Synchronization Coefficient (SSC) by Radicalization Status")
    plt.xlabel("SSC")
    plt.ylabel("Frequency")
    plt.savefig(os.path.join(event_folder, "SSC_distribution_radical.png"))
    plt.close()
    
    plt.figure(figsize=(10,6))
    sns.boxplot(x="radical", y="SSC", data=merged_df)
    plt.title("SSC by Radicalization Status")
    plt.xlabel("Radicalization Status (0 = Non-Radicalized, 1 = Radicalized)")
    plt.ylabel("SSC")
    plt.savefig(os.path.join(event_folder, "SSC_boxplot_radical.png"))
    plt.close()
    
    plt.figure(figsize=(10,6))
    sns.histplot(data=merged_df, x="TBI_SSC", hue="radical", kde=True, bins=30)
    plt.title("Distribution of TBI × SSC (Interaction) by Radicalization Status")
    plt.xlabel("TBI × SSC")
    plt.ylabel("Frequency")
    plt.savefig(os.path.join(event_folder, "Interaction_distribution_radical.png"))
    plt.close()
    
    plt.figure(figsize=(10,6))
    sns.boxplot(x="radical", y="TBI_SSC", data=merged_df)
    plt.title("Interaction (TBI × SSC) by Radicalization Status")
    plt.xlabel("Radicalization Status (0 = Non-Radicalized, 1 = Radicalized)")
    plt.ylabel("TBI × SSC")
    plt.savefig(os.path.join(event_folder, "Interaction_boxplot_radical.png"))
    plt.close()
    
    # Logistic Regression Analysis with TBI, SSC, and their Interaction as predictors.
    merged_df["intercept"] = 1.0
    predictors = ["intercept", "TBI", "SSC", "TBI_SSC"]
    X = merged_df[predictors]
    y = merged_df["radical"]
    
    logit_model = sm.Logit(y, X)
    try:
        logit_result = logit_model.fit(disp=False)
    except Exception as e:
        print("Logistic regression failed:", e)
        logit_result = None
    
    # Pearson Correlations.
    corr_tbi, pval_tbi = pearsonr(merged_df["TBI"], merged_df["radical"])
    corr_ssc, pval_ssc = pearsonr(merged_df["SSC"], merged_df["radical"])
    corr_inter, pval_inter = pearsonr(merged_df["TBI_SSC"], merged_df["radical"])
    
    # Save the merged DataFrame for further analysis.
    merged_df.to_csv(os.path.join(event_folder, "TBI_SSC_radicalization_analysis.csv"), index=False)
    
    print("Logistic Regression Summary (with TBI, SSC, and interaction):")
    if logit_result is not None:
        print(logit_result.summary())
    print(f"Pearson correlation between TBI and radicalization: {corr_tbi:.3f} (p={pval_tbi:.3e})")
    print(f"Pearson correlation between SSC and radicalization: {corr_ssc:.3f} (p={pval_ssc:.3e})")
    print(f"Pearson correlation between TBI×SSC and radicalization: {corr_inter:.3f} (p={pval_inter:.3e})")
    
    # Optionally, produce a contour plot of predicted radicalization probability
    # over a grid of TBI and SSC values.
    tbi_range = np.linspace(merged_df["TBI"].min(), merged_df["TBI"].max(), 50)
    ssc_range = np.linspace(merged_df["SSC"].min(), merged_df["SSC"].max(), 50)
    TBI_grid, SSC_grid = np.meshgrid(tbi_range, ssc_range)
    grid_df = pd.DataFrame({
        "TBI": TBI_grid.ravel(),
        "SSC": SSC_grid.ravel()
    })
    grid_df["TBI_SSC"] = grid_df["TBI"] * grid_df["SSC"]
    grid_df["intercept"] = 1.0
    X_grid = grid_df[["intercept", "TBI", "SSC", "TBI_SSC"]]
    
    if logit_result is not None:
        grid_df["predicted_prob"] = logit_result.predict(X_grid)
        grid_df = grid_df.reshape((50, 50, -1))  # Not strictly needed, we can reshape the predicted_prob.
        Z = logit_result.predict(X_grid).values.reshape(50, 50)
        plt.figure(figsize=(8,6))
        cp = plt.contourf(TBI_grid, SSC_grid, Z, levels=20, cmap='viridis')
        plt.colorbar(cp)
        plt.xlabel("TBI")
        plt.ylabel("SSC")
        plt.title("Predicted Radicalization Probability\n(TBI and SSC Interaction)")
        plt.savefig(os.path.join(event_folder, "predicted_probability_contour.png"))
        plt.close()
    
    return merged_df, logit_result, (corr_tbi, corr_ssc, corr_inter)

In [10]:
tbi_2008 = compute_TBI_from_tdl("data/2008_elections")
ssc_2008 = compute_SSC_from_asl("data/2008_elections")

TBI values saved to: data/2008_elections/TBI_values.pkl
SSC values saved to: data/2008_elections/SSC_values.pkl


In [18]:
# Compute negative word distribution to extract radical keywords.
neg_word_freq, radical_keywords = compute_negative_word_distribution("data/2008_elections", authors_perc=0.1, top_k_words=40)
print("Radical Keywords:", radical_keywords)

# Analyze the relationship between TBI, SSC, and radicalization.
merged_df, logit_result, correlations = analyze_tbi_ssc(tbi_2008, ssc_2008, "data/2008_elections", radical_keywords)

Radical Keywords: ['wrong', 'fuck', 'shit', 'stop', 'problem', 'crisis', 'stupid', 'hate', 'lies', 'anti', 'fraud', 'hell', 'poor', 'attack', 'racist', 'terrorist', 'lost', 'argument', 'lose', 'fear', 'crap', 'bullshit', 'worse', 'problems', 'crazy', 'damn', 'weapons', 'racism', 'doubt', 'illegal', 'attacks', 'fight', 'kill']
Logistic Regression Summary (TBI and SSC as predictors):
                           Logit Regression Results                           
Dep. Variable:                radical   No. Observations:                11817
Model:                          Logit   Df Residuals:                    11814
Method:                           MLE   Df Model:                            2
Date:                Fri, 28 Mar 2025   Pseudo R-squ.:                 0.09459
Time:                        10:53:28   Log-Likelihood:                -7367.2
converged:                       True   LL-Null:                       -8136.9
Covariance Type:            nonrobust   LLR p-value:          

In [19]:
tbi_2011 = compute_TBI_from_tdl("data/2011_wallstreet")
ssc_2011 = compute_SSC_from_asl("data/2011_wallstreet")

# Compute negative word distribution to extract radical keywords.
neg_word_freq, radical_keywords = compute_negative_word_distribution("data/2011_wallstreet", authors_perc=0.1, top_k_words=40)
print("Radical Keywords:", radical_keywords)

# Analyze the relationship between TBI, SSC, and radicalization.
merged_df, logit_result, correlations = analyze_tbi_ssc(tbi_2011, ssc_2011, "data/2011_wallstreet", radical_keywords)

TBI values saved to: data/2011_wallstreet/TBI_values.pkl
SSC values saved to: data/2011_wallstreet/SSC_values.pkl
Radical Keywords: ['protest', 'protesters', 'protests', 'problem', 'shit', 'wrong', 'fuck', 'stop', 'protesting', 'violence', 'poor', 'arrested', 'problems', 'anti', 'violent', 'debt', 'argument', 'stupid', 'fight', 'hate', 'hell', 'bullshit', 'arrest', 'blame', 'illegal', 'disagree', 'greed', 'fail', 'worse', 'lack', 'lose', 'lost', 'lower', 'risk', 'fighting']
Logistic Regression Summary (TBI and SSC as predictors):
                           Logit Regression Results                           
Dep. Variable:                radical   No. Observations:                 1396
Model:                          Logit   Df Residuals:                     1393
Method:                           MLE   Df Model:                            2
Date:                Fri, 28 Mar 2025   Pseudo R-squ.:                0.004911
Time:                        11:17:54   Log-Likelihood:              

In [6]:
tbi_2016 = compute_TBI_from_tdl("data/2016_elections")
ssc_2016 = compute_SSC_from_asl("data/2016_elections")

# Compute negative word distribution to extract radical keywords.
neg_word_freq, radical_keywords = compute_negative_word_distribution("data/2016_elections", authors_perc=0.1, top_k_words=40)
print("Radical Keywords:", radical_keywords)

# Analyze the relationship between TBI, SSC, and radicalization.
merged_df, logit_result, correlations = analyze_tbi_ssc(tbi_2016, ssc_2016, "data/2016_elections", radical_keywords)

TBI values saved to: data/2016_elections/TBI_values.pkl
SSC values saved to: data/2016_elections/SSC_values.pkl
Radical Keywords: ['fake', 'shit', 'fuck', 'wrong', 'stop', 'lost', 'racist', 'hate', 'problem', 'anti', 'rapist', 'stupid', 'illegal', 'bullshit', 'worse', 'hell', 'lose', 'argument', 'blame', 'crazy', 'damn', 'poor', 'attack', 'fraud', 'conspiracy', 'doubt', 'fight', 'rape', 'worst', 'rigged', 'lies', 'fucked', 'lying']


NameError: name 'analyze_tbi_ssc' is not defined

In [8]:
# Analyze the relationship between TBI, SSC, and radicalization.
merged_df, logit_result, correlations = analyze_tbi_ssc(tbi_2016, ssc_2016, "data/2016_elections", radical_keywords)



Logistic Regression Summary (with TBI, SSC, and interaction):
                           Logit Regression Results                           
Dep. Variable:                radical   No. Observations:               288877
Model:                          Logit   Df Residuals:                   288873
Method:                           MLE   Df Model:                            3
Date:                Fri, 28 Mar 2025   Pseudo R-squ.:                 -0.5938
Time:                        14:48:50   Log-Likelihood:            -3.1908e+05
converged:                      False   LL-Null:                   -2.0020e+05
Covariance Type:            nonrobust   LLR p-value:                     1.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept   1.936e+05   1979.889     97.805      0.000     1.9e+05    1.98e+05
TBI         2.006e+05   1981.048    101.280      0.000    1.97e+05   

AttributeError: 'DataFrame' object has no attribute 'reshape'

In [11]:
tbi_2017 = compute_TBI_from_tdl("data/2017_rally")
ssc_2017 = compute_SSC_from_asl("data/2017_rally")

# Compute negative word distribution to extract radical keywords.
neg_word_freq, radical_keywords = compute_negative_word_distribution("data/2017_rally", authors_perc=0.1, top_k_words=40)
print("Radical Keywords:", radical_keywords)

# Analyze the relationship between TBI, SSC, and radicalization.
merged_df, logit_result, correlations = analyze_tbi_ssc(tbi_2017, ssc_2017, "data/2017_rally", radical_keywords)

TBI values saved to: data/2017_rally/TBI_values.pkl
SSC values saved to: data/2017_rally/SSC_values.pkl
Radical Keywords: ['racist', 'shit', 'violence', 'hate', 'fuck', 'wrong', 'racism', 'anti', 'supremacists', 'stop', 'violent', 'problem', 'protest', 'stupid', 'attack', 'fake', 'protesters', 'racists', 'fight', 'bullshit', 'argument', 'fascist', 'killed', 'worse', 'hell', 'illegal', 'blame', 'evil', 'terrorist', 'lost', 'death', 'crime', 'kill', 'fire', 'terrorism', 'murder', 'fighting']




Logistic Regression Summary (with TBI, SSC, and interaction):
                           Logit Regression Results                           
Dep. Variable:                radical   No. Observations:               102925
Model:                          Logit   Df Residuals:                   102921
Method:                           MLE   Df Model:                            3
Date:                Fri, 28 Mar 2025   Pseudo R-squ.:                  -5.432
Time:                        15:11:44   Log-Likelihood:            -4.5886e+05
converged:                      False   LL-Null:                       -71342.
Covariance Type:            nonrobust   LLR p-value:                     1.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept   5.578e+05   1.92e+04     29.028      0.000     5.2e+05    5.96e+05
TBI         5.717e+05   1.92e+04     29.745      0.000    5.34e+05   

  return 1/(1+np.exp(-X))


AttributeError: 'DataFrame' object has no attribute 'reshape'

In [12]:
tbi_2021 = compute_TBI_from_tdl("data/2021_riot")
ssc_2021 = compute_SSC_from_asl("data/2021_riot")

# Compute negative word distribution to extract radical keywords.
neg_word_freq, radical_keywords = compute_negative_word_distribution("data/2021_riot", authors_perc=0.1, top_k_words=40)
print("Radical Keywords:", radical_keywords)

# Analyze the relationship between TBI, SSC, and radicalization.
merged_df, logit_result, correlations = analyze_tbi_ssc(tbi_2021, ssc_2021, "data/2021_riot", radical_keywords)

TBI values saved to: data/2021_riot/TBI_values.pkl
SSC values saved to: data/2021_riot/SSC_values.pkl
Radical Keywords: ['shit', 'fuck', 'stop', 'violence', 'riot', 'terrorists', 'wrong', 'attack', 'stupid', 'fight', 'lost', 'hate', 'conspiracy', 'fraud', 'terrorist', 'protests', 'death', 'violent', 'riots', 'hell', 'bullshit', 'problem', 'protest', 'arrested', 'killed', 'crazy', 'worse', 'fire', 'damn', 'prison', 'charges', 'died', 'lies', 'criminal', 'argument', 'crime']




Logistic Regression Summary (with TBI, SSC, and interaction):
                           Logit Regression Results                           
Dep. Variable:                radical   No. Observations:               253207
Model:                          Logit   Df Residuals:                   253203
Method:                           MLE   Df Model:                            3
Date:                Fri, 28 Mar 2025   Pseudo R-squ.:                  -2.529
Time:                        16:46:24   Log-Likelihood:            -6.1191e+05
converged:                      False   LL-Null:                   -1.7337e+05
Covariance Type:            nonrobust   LLR p-value:                     1.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept   3.746e+05   6605.347     56.712      0.000    3.62e+05    3.88e+05
TBI         3.846e+05   6606.368     58.212      0.000    3.72e+05   

AttributeError: 'DataFrame' object has no attribute 'reshape'