<a href="https://colab.research.google.com/github/jhunwillparian0912-cell/Data-Analysis/blob/main/fullstat_script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
!pip install scikit-posthocs
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import shapiro, levene, f_oneway, kruskal, spearmanr
import scikit_posthocs as sp

In [None]:
df = pd.read_excel("")
df.head()

In [None]:
#table summary for variable 1
variable1_table = df.groupby('Strand')['variable1'].mean().reset_index()

def interpret_variable1(score):
    if score >= 4.21:
        return "Very High"
    elif score >= 3.41:
        return "High"
    elif score >= 2.61:
        return "Moderate"
    elif score >= 1.81:
        return "Low"
    else:
        return "Very Low"

variable1_table ['Interpretation'] = variable1_table['variable1'].apply(interpret_variable1)

print("Mean Variable1 by Strand with Interpretation:")
print(variable1_table)

In [None]:
#table summary for variable 2
variable2_summary = df.groupby('Strand')['variable2'].mean().reset_index()

def interpret_variable2(score):
    if score >= 4.21:
        return "Very High"
    elif score >= 3.41:
        return "High"
    elif score >= 2.61:
        return "Moderate"
    elif score >= 1.81:
        return "Low"
    else:
        return "Very Low"

variable2_summary['Interpretation'] = variable2_summary['variable 1'].apply(interpret_variable2)

print("Mean variable 2 by Strand with Interpretation:")
print(variable2_summary)

In [None]:
#double bar graph
mean_std = df.groupby('Strand')[['variable1','variable2']].agg(['mean','std'])
strands = mean_std.index
variable1_mean = mean_std['variable1']['mean']
variable1_std = mean_std['variable1']['std']
variable2_mean = mean_std['variable2']['mean']
variable2_std = mean_std['variable2']['std']
x = np.arange(len(strands))
width = 0.35

fig, ax = plt.subplots(figsize=(12,6))
rects1 = ax.bar(x - width/2, variable1_mean, width, yerr=variable1_std, capsize=5, label='variable1')
rects2 = ax.bar(x + width/2, variable2_mean, width, yerr=variable2_std, capsize=5, label='variable2')

ax.set_ylabel('Mean Score')
ax.set_xlabel('Strand')
ax.set_title('variable1 and variable2 Level by Strand')
plt.ylim(0,5)
ax.set_xticks(x)
ax.set_xticklabels(strands, rotation=45)
ax.legend()

In [None]:
#NEEDS TO BE CHECKED
#levene's test
levene_results_list = []

for col in ['variable1', 'Variable2']:
    stat, p = levene(df[col])
    interpretation = "fail to reject H0 (equal variances)" if p > 0.05 else "reject H0 (variances not equal)"

    levene_results_list.append({
        'Variable': col,
        'Levene_statistic': stat,
        'p_value': p,
        'Interpretation': interpretation
    })

levene_results = pd.DataFrame(levene_results_list)
print("Levene's Test Results:")
print(levene_results)

In [None]:
#box plot for variable 1
plt.figure(figsize=(12,6))
sns.boxplot(x='Strand', y='variable1', data=df)
plt.title("Boxplot of variable1 per Strand")
plt.ylabel("variable1")
plt.show()

In [None]:
#box plot for variable 2
plt.figure(figsize=(12,6))
sns.boxplot(x='Strand', y='variable2', data=df)
plt.title("Boxplot of variable2 per Strand")
plt.ylabel("variable2")
plt.show()

In [None]:
#shapiro-wilk test
shapiro_results_list = []

for col in ['AI_usage','Guilt']:
    W, p = shapiro(df[col])
    interpretation = "fail to reject H0" if p > 0.05 else "reject H0"
    shapiro_results_list.append({
        'Variable': col,
        'W_statistic': W,
        'p_value': p,
        'Interpretation': interpretation
        })

shapiro_results = pd.DataFrame(shapiro_results_list)
print("Shapiro-Wilk Test Results:")
print(shapiro_results)

In [None]:
#histogram variable 1
plt.figure(figsize=(10,5))
sns.histplot(df['variable1'], bins=15, kde=True)
plt.title("Histogram of variable1 with Density Curve")
plt.xlabel("variable1")
plt.ylabel("Frequency")
plt.show()

In [None]:
#histrogram variable 2
plt.figure(figsize=(10,5))
sns.histplot(df['variable2'], bins=15, kde=True)
plt.title("Histogram of variable2 with Density Curve")
plt.xlabel("Variable2")
plt.ylabel("Frequency")
plt.show()

In [None]:
#kruskal-wallis variable 1
groups_variable1 = [group['variable1'].values for name, group in df.groupby('Strand')]
H_variable1, p_variable1 = kruskal(*groups_variable1)

interpretation_variable1 = "reject H0" if p_variable1 < 0.05 else "fail to reject H0"

kruskal_variable1_table = pd.DataFrame({
    'Variable': ['variable1'],
    'H_statistic': [H_variable1],
    'p_value': [p_variable1],
    'Interpretation': [interpretation_variable1]
})

print("Kruskal-Wallis Test Results - variable1:")
print(kruskal_variable1_table)

In [None]:
#kruskal-wallis variable 2
groups_variable2 = [group['variable2'].values for name, group in df.groupby('Strand')]
H_variable2, p_variable2 = kruskal(*groups_variable2)

interpretation_variable2 = "reject H0" if p_variable2 < 0.05 else "fail to reject H0"

kruskal_variable2_table = pd.DataFrame({
    'Variable': ['variable2'],
    'H_statistic': [H_variable2],
    'p_value': [p_variable2],
    'Interpretation': [interpretation_variable2]
})

print("Kruskal-Wallis Test Results - variable2:")
print(kruskal_variable2_table)

In [None]:
#post-hoc analysis variable 1

In [None]:
#post-hoc analysis variable 2

In [None]:
#spearman-rank correlation
rho, pval = spearmanr(df['variable1'], df['variable2'])

significance = "Significant correlation" if pval < 0.05 else "No significant correlation"

if abs(rho) >= 0.70:
    strength = "Very Strong Positive" if rho > 0 else "Very Strong Negative"
elif abs(rho) >= 0.40:
    strength = "Strong Positive" if rho > 0 else "Strong Negative"
elif abs(rho) >= 0.30:
    strength = "Moderate Positive" if rho > 0 else "Moderate Negative"
elif abs(rho) >= 0.20:
    strength = "Weak Positive" if rho > 0 else "Weak Negative"
else:
    strength = "Negligible correlation"

spearman_table = pd.DataFrame({
    'Spearman_rho': [rho],
    'p_value': [pval],
    'Significance': [significance],
    'Strength': [strength]
})

print("Spearman Correlation Results:")
print(spearman_table)

In [None]:
plt.figure(figsize=(8,6))
sns.scatterplot(x='variable1', y='variable2', data=df, color='teal')
plt.title(f"Scatter Plot: variable1 vs variable2\nSpearman rho={rho:.3f}, p={pval:.3f}, {strength}")
plt.xlabel("variable1")
plt.ylabel("variable2")

sns.regplot(x='variable1', y='variable2', data=df, scatter=False, color='orange', line_kws={"linewidth":1.5})

plt.show()