In [14]:
import os
import json
import pandas as pd
from collections import Counter
from itertools import combinations
from scipy.stats import chi2_contingency

# Function to read data from a file
def read_file(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data

# Function to extract relevant variables from the data
def extract_variables(data):
    variables = {
        'Headache': data.get('isHeadache', 'No'),
        'Dizziness': data.get('isDizzy', 'No'),
        'Nausea': data.get('isNausea', 'No'),
        'Noise_sensitivity': data.get('isSensitiveToNoise', 'No'),
        'Sleep_disturbance': data.get('hasTroubleSleeping', 'No'),
        'Fatigue': data.get('isFatigued', 'No'),
        'Irritated': data.get('isIrritable', 'No'),
        'Depressed': data.get('isFeelingDown', 'No'),
        'Has_Concussion': data.get('isConcussion', 'No')
    }
    return variables

# Function to perform statistical analysis
def perform_statistical_analysis(data):
    print("Data received for analysis:")
    print(data)
    df = pd.DataFrame(data)
    print("DataFrame created from data:")
    print(df)
    crosstab = pd.crosstab(df['Has_Concussion'], columns=[df[col] for col in df.columns if col != 'Has_Concussion'])
    print("Crosstab created:")
    print(crosstab)
    chi2, p, _, _ = chi2_contingency(crosstab)
    return chi2, p

# Path to the folder containing the data files
folder_path = 'C:\\USF\\Semester2\SmartAndConnectedHealth\FinalProject\DiagHistory'

all_data = []

# Loop through each JSON file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".json"):
        file_path = os.path.join(folder_path, filename)
        with open(file_path, "r") as file:
            data = json.load(file)
            data = extract_variables(data)
            all_data.append(data)

# Print the first few entries to verify
for i, data in enumerate(all_data[:5]):
    print(f"Data from file {i+1}:")
    print(data)
    print()

# Perform statistical analysis
chi2, p = perform_statistical_analysis(all_data)
print("Chi-square value:", chi2)
print("P-value:", p)


Data from file 1:
{'Headache': 'Yes', 'Dizziness': 'Yes', 'Nausea': 'Yes', 'Noise_sensitivity': 'Yes', 'Sleep_disturbance': 'Yes', 'Fatigue': 'Yes', 'Irritated': 'Yes', 'Depressed': 'Yes', 'Has_Concussion': 'Yes'}

Data from file 2:
{'Headache': 'Yes', 'Dizziness': 'Yes', 'Nausea': 'Yes', 'Noise_sensitivity': 'No', 'Sleep_disturbance': 'No', 'Fatigue': 'No', 'Irritated': 'No', 'Depressed': 'No', 'Has_Concussion': 'Yes'}

Data from file 3:
{'Headache': 'Yes', 'Dizziness': 'Yes', 'Nausea': 'Yes', 'Noise_sensitivity': 'Yes', 'Sleep_disturbance': 'No', 'Fatigue': 'Yes', 'Irritated': 'No', 'Depressed': 'Yes', 'Has_Concussion': 'Yes'}

Data from file 4:
{'Headache': 'Yes', 'Dizziness': 'Yes', 'Nausea': 'No', 'Noise_sensitivity': 'Yes', 'Sleep_disturbance': 'Yes', 'Fatigue': 'No', 'Irritated': 'Yes', 'Depressed': 'Yes', 'Has_Concussion': 'Yes'}

Data from file 5:
{'Headache': 'Yes', 'Dizziness': 'Yes', 'Nausea': 'No', 'Noise_sensitivity': 'No', 'Sleep_disturbance': 'No', 'Fatigue': 'No', 'Irr