In [None]:
import pickle
import pandas as pd

In [None]:
with open("understanding_results.pkl", 'rb') as f:
    df =pickle.load(f)

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
from causalnlp import CausalInferenceModel
from lightgbm import LGBMClassifier

In [None]:
cm = CausalInferenceModel(df, 
                         metalearner_type='t-learner', learner=LGBMClassifier(num_leaves=10),
                         treatment_col='resolution_numeric', outcome_col='rating_numeric', text_col='comment_history_table_string',
                         include_cols=['aws_platform_numeric', 'custom_product', 'sentiment', 'support_case_numeric', 'understanding_numeric'])
cm.fit()

In [None]:
print(cm.estimate_ate())

In [None]:
print( cm.estimate_ate(df['comment_history_table_string'].str.contains('good')) )

In [None]:
print( cm.interpret(plot=False)[1][:10] )

In [None]:
df['understanding_numeric'] = df['understanding_numeric'].fillna(0.0)

In [None]:
def calculate_edge_properties(df, treatment, outcome, covariates):
    """Calculate ATE and confidence intervals for a single edge."""
    cm = CausalInferenceModel(df, 
                         metalearner_type='t-learner', learner=LGBMClassifier(num_leaves=10),
                         treatment_col=treatment, outcome_col=outcome, text_col='comment_history_table_string',
                         include_cols=covariates)
    cm.fit()
    ate = cm.estimate_ate()
    return ate

potential_treatments = ['support_case_numeric', 'aws_platform_numeric', 'sentiment', 'resolution_numeric', 'understanding_numeric']
outcome = 'rating_numeric'
covariates = ['aws_platform_numeric', 'custom_product', 'sentiment', 'support_case_numeric', 'resolution_numeric', 'understanding_numeric']

edge_properties = {}
for treatment in potential_treatments:
    edge_properties[treatment] = calculate_edge_properties(
        df, treatment, outcome, [c for c in covariates if c != treatment]
    )

In [None]:
print(edge_properties)

In [None]:
import pickle


with open("final_df.pkl", 'wb') as f:
    pickle.dump(df, f)

In [None]:
def calculate_individual_edge_properties(row):
    tmp_df = pd.DataFrame(row)
    t_tmp = tmp_df.T.reset_index()
    effect = cm.predict(t_tmp)
    return effect[0][0]

df['resolution_effect'] = df.apply(calculate_individual_edge_properties, axis=1)

In [None]:
df.head()

In [None]:
import pickle


with open("causal_analysis.pkl", 'wb') as f:
    pickle.dump(df, f)

In [None]:
import sys

def get_deep_size(obj, seen=None):
    """
    Find the total size of an object and all its contents recursively.
    """
    # Initialize the set of seen objects if needed
    if seen is None:
        seen = set()
    
    # Get object id to track already counted objects
    obj_id = id(obj)
    
    # If we've already seen this object, don't count it again
    if obj_id in seen:
        return 0
    
    # Mark this object as seen
    seen.add(obj_id)
    
    # Start with the size of the object itself
    size = sys.getsizeof(obj)
    
    # Handle containers that need recursive measurement
    if isinstance(obj, dict):
        size += sum(get_deep_size(k, seen) + get_deep_size(v, seen) for k, v in obj.items())
    elif isinstance(obj, (list, tuple, set, frozenset)):
        size += sum(get_deep_size(item, seen) for item in obj)
    
    # For custom objects, you might want to add their __dict__ contents
    if hasattr(obj, '__dict__'):
        size += get_deep_size(obj.__dict__, seen)
    
    return size

def get_size_in_kb(obj):
    """
    Returns the deep size of an object in kilobytes.
    """
    size_in_bytes = get_deep_size(obj)
    size_in_kb = size_in_bytes / 1024  # Convert bytes to KB
    return size_in_kb

In [None]:
dic = df.to_dict()

In [None]:
print(f"Dictionary size: {get_size_in_kb(dic):.2f} KB")

In [None]:
df[:50].to_csv("./sample_data.csv")