<a href="https://colab.research.google.com/github/azragundogan/DSA210_Project/blob/main/analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind

# Grafiklerin görünmesi için (notebook'taysan)
try:
    get_ipython
    %matplotlib inline
except NameError:
    pass

# ----------------------------------------------------
# 1. VERİYİ OKU
# ----------------------------------------------------
# Dosya adın farklıysa burayı değiştir
df = pd.read_excel("DSA 210-Dataset.xlsx")

print("İlk satırlar:")
print(df.head())
print("\nBilgi:")
print(df.info())
print("\nEksik değer sayıları:")
print(df.isna().sum())

# ----------------------------------------------------
# 2. YENİ DEĞİŞKENLER (H1 için)
# ----------------------------------------------------
# Eğlence amaçlı uygulamalar: Instagram + TikTok + YouTube
df["Entertainment_min"] = df["Instagram_min"] + df["TikTok_min"] + df["YouTube_min"]

# İletişim amaçlı uygulama: WhatsApp
df["Communication_min"] = df["WhatsApp_min"]

# Toplam ekran süresine oran (0'a bölmeyi engelle)
df["Entertainment_ratio"] = np.where(
    df["Total_Screen_min"] > 0,
    df["Entertainment_min"] / df["Total_Screen_min"],
    np.nan
)

print("\nYeni değişkenlerle ilk satırlar:")
print(df[[
    "Participant_ID", "Day",
    "WhatsApp_min", "Instagram_min", "TikTok_min", "YouTube_min",
    "Entertainment_min", "Communication_min", "Entertainment_ratio",
    "Total_Screen_min", "Total_Social_min", "Exam_day_or_not"
]].head())

# ----------------------------------------------------
# 3. ÖZET İSTATİSTİKLER
# ----------------------------------------------------
print("\nTemel özet istatistikler:")
print(df[[
    "Total_Screen_min",
    "Total_Social_min",
    "Entertainment_min",
    "Communication_min",
    "Entertainment_ratio"
]].describe())

# ----------------------------------------------------
# 4. SINAV GÜNÜ vs NORMAL GÜN ORTALAMALARI
# ----------------------------------------------------
group_means = df.groupby("Exam_day_or_not")[[
    "Entertainment_min",
    "Communication_min",
    "Total_Screen_min",
    "Total_Social_min",
    "Entertainment_ratio"
]].mean()

print("\nExam_day_or_not=0 (normal), 1 (sınav) ortalamalar:")
print(group_means)

# ----------------------------------------------------
# 5. GRAFİKLER
# ----------------------------------------------------
labels = ["Non-exam days", "Exam days"]

# Eğlence uygulamaları
group_means["Entertainment_min"].plot(
    kind="bar", rot=0, figsize=(5,4)
)
plt.xticks([0,1], labels)
plt.ylabel("Average entertainment minutes")
plt.title("Entertainment apps (Instagram + TikTok + YouTube)\nExam vs non-exam days")
plt.tight_layout()
plt.show()

# WhatsApp (iletişim)
group_means["Communication_min"].plot(
    kind="bar", rot=0, figsize=(5,4)
)
plt.xticks([0,1], labels)
plt.ylabel("Average communication minutes (WhatsApp)")
plt.title("Communication app (WhatsApp)\nExam vs non-exam days")
plt.tight_layout()
plt.show()

# Toplam ekran süresi
group_means["Total_Screen_min"].plot(
    kind="bar", rot=0, figsize=(5,4)
)
plt.xticks([0,1], labels)
plt.ylabel("Average total screen time (min)")
plt.title("Total screen time\nExam vs non-exam days")
plt.tight_layout()
plt.show()

# ----------------------------------------------------
# 6. HİPOTEZ TESTİ (H1 için t-test)
# ----------------------------------------------------
# Exam ve non-exam gruplarını ayır
ent_exam = df[df["Exam_day_or_not"] == 1]["Entertainment_min"]
ent_non  = df[df["Exam_day_or_not"] == 0]["Entertainment_min"]

com_exam = df[df["Exam_day_or_not"] == 1]["Communication_min"]
com_non  = df[df["Exam_day_or_not"] == 0]["Communication_min"]

print("\nGrup boyutları:")
print("Entertainment - exam:", len(ent_exam), " non-exam:", len(ent_non))
print("Communication - exam:", len(com_exam), " non-exam:", len(com_non))

# Eğlence kullanımı için t-test
t_ent, p_ent = ttest_ind(ent_exam, ent_non, equal_var=False)
print("\nEntertainment minutes - t-test sonucu")
print("t-statistic:", t_ent)
print("p-value    :", p_ent)

# WhatsApp için t-test
t_com, p_com = ttest_ind(com_exam, com_non, equal_var=False)
print("\nWhatsApp minutes - t-test sonucu")
print("t-statistic:", t_com)
print("p-value    :", p_com)

# ----------------------------------------------------
# 7. RAPORLUK ÖZET TABLO
# ----------------------------------------------------
summary = pd.DataFrame({
    "Group": ["Non-exam", "Exam"],
    "Entertainment_mean": [
        ent_non.mean(),
        ent_exam.mean()
    ],
    "Communication_mean": [
        com_non.mean(),
        com_exam.mean()
    ],
    "Total_screen_mean": [
        df[df["Exam_day_or_not"] == 0]["Total_Screen_min"].mean(),
        df[df["Exam_day_or_not"] == 1]["Total_Screen_min"].mean()
    ]
})

print("\nÖzet tablo (rapora koymalık):")
print(summary)