In [38]:
import pandas as pd

df = pd.read_csv("MiniProject.csv")
df.head()

Unnamed: 0,visits,helpfulness,satisfaction,energy,standing,diet
0,1-2 times,Helpful,5,3,Sophomore,2
1,1-2 times,Neither Helpful nor Unhelpful,3,3,Junior,3
2,"0, I don't go to the SRC","N/A, never interacted with SRC staff",3,3,Junior,3
3,"0, I don't go to the SRC",Helpful,3,2,Senior,3
4,1-2 times,"N/A, never interacted with SRC staff",3,2,Sophomore,2


In [26]:
import pandas as pd
from scipy.stats import chi2_contingency, pearsonr, spearmanr

data = {
    "visits": [
        "0, I don't go to the SRC",
        "1-2 times",
        "3-4 times",
        "5 or more times"
    ],
    2: [1, 1, 0, 0],
    3: [21, 4, 1, 0],
    4: [6, 6, 0, 3],
    5: [4, 6, 2, 2]
}

df = pd.DataFrame(data)

df_long = df.melt(id_vars="visits", var_name="satisfaction", value_name="count")

records = []
visit_mapping = {
    "0, I don't go to the SRC": 0,
    "1-2 times": 1,
    "3-4 times": 2,
    "5 or more times": 3
}

for _, row in df_long.iterrows():
    records.extend([
        (visit_mapping[row["visits"]], row["satisfaction"])
    ] * int(row["count"]))

cor_df = pd.DataFrame(records, columns=["Visit Frequency", "Satisfaction Score"])

contingency_table = pd.crosstab(df_long['visits'], df_long['satisfaction'], values=df_long['count'], aggfunc='sum').fillna(0)
chi2_stat, p_val, dof, expected = chi2_contingency(contingency_table)

print("Chi-Square Test:")
print(f"Chi2 Statistic = {chi2_stat:.2f}, p-value = {p_val:.5f}, Degrees of Freedom = {dof}")

pearson_corr, _ = pearsonr(cor_df["Visit Frequency"], cor_df["Satisfaction Score"])

print("\nCorrelation Test:")
print(f"Pearson Correlation = {pearson_corr:.2f}")

Chi-Square Test:
Chi2 Statistic = 17.08, p-value = 0.04749, Degrees of Freedom = 9

Correlation Tests:
Pearson Correlation = 0.41


In [24]:
#crosstab is a pivottable that holds counts
#Determined chi square and Correlation between satisfaction and the number of visits. 
visit_to_satisfaction = pd.crosstab(df.visits, df.satisfaction)
visit_to_satisfaction 

satisfaction,2,3,4,5
visits,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"0, I don't go to the SRC",1,21,6,4
1-2 times,1,4,6,6
3-4 times,0,1,0,2
5 or more times,0,0,3,2


In [34]:
import pandas as pd
from scipy.stats import chi2_contingency, pearsonr, spearmanr


data = {
    "Helpfulness": [
        "Helpful",
        "N/A, never interacted with SRC staff",
        "Neither Helpful nor Unhelpful",
        "Unhelpful",
        "Very Helpful"
    ],
    "0, I don't go to the SRC": [6, 20, 5, 0, 1],
    "1-2 times": [7, 5, 4, 0, 1],
    "3-4 times": [1, 0, 1, 0, 1],
    "5 or more times": [0, 0, 4, 1, 0]
}

df = pd.DataFrame(data)

contingency_table = df.set_index("Helpfulness")
chi2_stat, p_val, dof, expected = chi2_contingency(contingency_table)
print("Chi-Square Test:")
print(f"Chi2 Statistic = {chi2_stat:.2f}, p-value = {p_val:.5f}, Degrees of Freedom = {dof}")

helpfulness_mapping = {
    "N/A, never interacted with SRC staff": 0,
    "Unhelpful": 1,
    "Neither Helpful nor Unhelpful": 2,
    "Helpful": 3,
    "Very Helpful": 4
}

records = []
for i, row in df.iterrows():
    helpfulness_score = helpfulness_mapping[row["Helpfulness"]]
    for j, visits in enumerate(row[1:]):
        visit_score = j  # 0 to 3 (ordinal visit frequency)
        count = row[1:][j]
        records.extend([(visit_score, helpfulness_score)] * count)

correlation_df = pd.DataFrame(records, columns=["VisitFreq", "HelpfulnessScore"])

pearson_corr, _ = pearsonr(correlation_df["VisitFreq"], correlation_df["HelpfulnessScore"])

print("\nCorrelation Tests:")
print(f"Pearson Correlation = {pearson_corr:.2f}")

Chi-Square Test:
Chi2 Statistic = 33.37, p-value = 0.00085, Degrees of Freedom = 12

Correlation Tests:
Pearson Correlation = 0.31


  count = row[1:][j]


In [40]:
visits_to_helpfulness = pd.crosstab(df.visits, df.helpfulness)
visits_to_helpfulness

helpfulness,Helpful,"N/A, never interacted with SRC staff",Neither Helpful nor Unhelpful,Unhelpful,Very Helpful
visits,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"0, I don't go to the SRC",6,20,5,0,1
1-2 times,7,5,4,0,1
3-4 times,1,0,1,0,1
5 or more times,0,0,4,1,0


In [46]:
import pandas as pd
from scipy.stats import chi2_contingency, pearsonr

data = {
    "Standing": ["Freshman", "Graduate", "Junior", "Senior", "Sophomore"],
    "0, I don't go to the SRC": [2, 1, 10, 14, 5],
    "1-2 times": [0, 1, 6, 3, 7],
    "3-4 times": [0, 0, 2, 1, 0],
    "5 or more times": [0, 0, 2, 2, 1]
}

df = pd.DataFrame(data)

contingency_table = df.set_index("Standing")
chi2_stat, p_val, dof, expected = chi2_contingency(contingency_table)
print("Chi-Square Test:")
print(f"Chi2 Statistic = {chi2_stat:.2f}, p-value = {p_val:.5f}, Degrees of Freedom = {dof}")

standing_mapping = {
    "Freshman": 0,
    "Sophomore": 1,
    "Junior": 2,
    "Senior": 3,
    "Graduate": 4
}

records = []
for i, row in df.iterrows():
    standing_score = standing_mapping[row["Standing"]]
    for j, count in enumerate(row[1:]):
        visit_score = j  # visit frequency levels: 0 to 3
        records.extend([(visit_score, standing_score)] * count)

correlation_df = pd.DataFrame(records, columns=["VisitFreq", "StandingScore"])


pearson_corr, _ = pearsonr(correlation_df["VisitFreq"], correlation_df["StandingScore"])

print("\nPearson Correlation:")
print(f"Pearson Correlation = {pearson_corr:.2f}")
print("Can't reject null hypothesis")


Chi-Square Test:
Chi2 Statistic = 9.29, p-value = 0.67827, Degrees of Freedom = 12

Pearson Correlation:
Pearson Correlation = -0.03
Can't reject null hypothesis


In [50]:
import pandas as pd

df = pd.read_csv("MiniProject.csv")
df.head()
standing_to_visits = pd.crosstab(df.standing, df.visits)
standing_to_visits

visits,"0, I don't go to the SRC",1-2 times,3-4 times,5 or more times
standing,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Freshman,2,0,0,0
Graduate,1,1,0,0
Junior,10,6,2,2
Senior,14,3,1,2
Sophomore,5,7,0,1


In [52]:
import pandas as pd
from scipy.stats import chi2_contingency, pearsonr

data = {
    "Visit": [
        "0, I don't go to the SRC",
        "1-2 times",
        "3-4 times",
        "5 or more times"
    ],
    1: [2, 1, 0, 0],
    2: [9, 2, 2, 2],
    3: [16, 7, 0, 3],
    4: [5, 6, 1, 0],
    5: [0, 1, 0, 0]
}

df = pd.DataFrame(data)

# Chi-Square Test
contingency_table = df.set_index("Visit")
chi2_stat, p_val, dof, expected = chi2_contingency(contingency_table)
print("Chi-Square Test:")
print(f"Chi2 Statistic = {chi2_stat:.2f}, p-value = {p_val:.5f}, Degrees of Freedom = {dof}")

visit_mapping = {
    "0, I don't go to the SRC": 0,
    "1-2 times": 1,
    "3-4 times": 2,
    "5 or more times": 3
}

records = []
for i, row in df.iterrows():
    visit_score = visit_mapping[row["Visit"]]
    for j, count in enumerate(row[1:]):
        energy_score = j + 1  # energy levels 1 through 5
        records.extend([(visit_score, energy_score)] * count)

correlation_df = pd.DataFrame(records, columns=["VisitFreq", "EnergyScore"])

pearson_corr, _ = pearsonr(correlation_df["VisitFreq"], correlation_df["EnergyScore"])

print("\nPearson Correlation:")
print(f"Pearson Correlation = {pearson_corr:.2f}")

Chi-Square Test:
Chi2 Statistic = 11.62, p-value = 0.47651, Degrees of Freedom = 12

Pearson Correlation:
Pearson Correlation = 0.01


In [56]:
import pandas as pd

df = pd.read_csv("MiniProject.csv")
df.head()

visits_to_energy = pd.crosstab(df.visits, df.energy)
visits_to_energy

energy,1,2,3,4,5
visits,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"0, I don't go to the SRC",2,9,16,5,0
1-2 times,1,2,7,6,1
3-4 times,0,2,0,1,0
5 or more times,0,2,3,0,0


In [1]:
import pandas as pd
from scipy.stats import chi2_contingency, pearsonr


data = {
    'standing': ['Freshman', 'Graduate', 'Junior', 'Senior', 'Sophomore'],
    'diet': [2.5, 3.0, 3.25, 2.75, 3.307692]
}

df = pd.DataFrame(data)


ordinal_map = {'Freshman': 1, 'Sophomore': 2, 'Junior': 3, 'Senior': 4, 'Graduate': 5}
df['standing_num'] = df['standing'].map(ordinal_map)


pearson_corr, pearson_p = pearsonr(df['standing_num'], df['diet'])


df['diet_cat'] = pd.qcut(df['diet'], q=3, labels=['Low', 'Medium', 'High'])
contingency = pd.crosstab(df['standing'], df['diet_cat'])
chi2, p, dof, expected = chi2_contingency(contingency)


print("Chi-Square Test:")
print(f"Chi2 Statistic = {chi2:.2f}, p-value = {p:.5f}, Degrees of Freedom = {dof}\n")

print("Correlation Test:")
print(f"Pearson Correlation = {pearson_corr:.2f}")

Chi-Square Test:
Chi2 Statistic = 10.00, p-value = 0.26503, Degrees of Freedom = 8

Correlation Test:
Pearson Correlation = 0.21


In [3]:
#this is for standing to diet level mean
standing_to_diet_mean = pd.pivot_table(df, index = "standing", values ="diet", aggfunc = "mean")
standing_to_diet_mean

Unnamed: 0_level_0,diet
standing,Unnamed: 1_level_1
Freshman,2.5
Graduate,3.0
Junior,3.25
Senior,2.75
Sophomore,3.307692


In [5]:
import pandas as pd
from scipy.stats import chi2_contingency, pearsonr

data = {
    1: [2, 1, 0, 0],
    2: [9, 2, 2, 2],
    3: [16, 7, 0, 3],
    4: [5, 6, 1, 0],
    5: [0, 1, 0, 0]
}

index = ["0, I don't go to the SRC", "1-2 times", "3-4 times", "5 or more times"]
df = pd.DataFrame(data, index=index)


chi2, p, dof, expected = chi2_contingency(df)

print("Chi-Square Test:")
print(f"Chi2 Statistic = {chi2:.2f}, p-value = {p:.5f}, Degrees of Freedom = {dof}\n")

visit_levels = [0, 1, 2, 3]  # Corresponding to the index
energy_levels = [1, 2, 3, 4, 5]

visits_list = []
energy_list = []

for i, visit_level in enumerate(visit_levels):
    for j, energy_level in enumerate(energy_levels):
        count = df.iloc[i, j]
        visits_list.extend([visit_level] * count)
        energy_list.extend([energy_level] * count)

pearson_corr, pearson_p = pearsonr(visits_list, energy_list)

print("Correlation Test:")
print(f"Pearson Correlation = {pearson_corr:.2f}, p-value = {pearson_p:.5f}")

Chi-Square Test:
Chi2 Statistic = 11.62, p-value = 0.47651, Degrees of Freedom = 12

Correlation Test:
Pearson Correlation = 0.01, p-value = 0.91345
