In [1]:
import pandas as pd
import os
import numpy as np
#from mne.datasets.mtrf.mtrf import data_name

In [2]:
def read_file(file_path):
    """
    Reads a CSV file and returns a DataFrame with specific columns.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        pd.DataFrame: A DataFrame containing the specified columns.
    """

    # Check if the file exists
    if not os.path.isfile(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")

    # Read the CSV file
    df = pd.read_csv(file_path)

    # Select specific columns
    selected_columns = ['id', 'name', 'age']

    # Check if the selected columns exist in the DataFrame
    missing_columns = [col for col in selected_columns if col not in df.columns]
    if missing_columns:
        raise ValueError(f"The following columns are missing from the DataFrame: {missing_columns}")

    # Return the DataFrame with selected columns
    return df[selected_columns]

In [3]:
# Test the function with a sample CSV file
# Create a sample CSV file
sample_data = {
    'id': [1, 2, 3],
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35],
    'city': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(sample_data)
sample_file_path = 'sample.csv'
df.to_csv(sample_file_path, index=False)

# Read the file using the function
try:
    result_df = read_file(sample_file_path)
    print(result_df)
except Exception as e:
    print(f"Error: {e}")

   id     name  age
0   1    Alice   25
1   2      Bob   30
2   3  Charlie   35


In [4]:
# Clean up the sample file after testing
os.remove(sample_file_path)

<div style="text-align: center; font-size: 24px;">
    <b>Filter Age Standardized
    </b>
</div>


In [7]:
# Read data from GHD2021_Schizophrenia/Schiz/
prevalance5_df = pd.DataFrame()
file_path = "./"

for x in range(1, 2):
    #file_name = file_path + f"IHME-GBD_2021_DATA-851501aa-{x}.csv"
    file_name = file_path + f"IHME-GBD_2021_DATA-575f1e63-1.csv"
    if not os.path.isfile(file_name):
        print(f"File not found: {file_name}")
        continue

    df = pd.read_csv(file_name)
    print(f"Reading file: {file_name}")
    print(f"Columns: {df.columns}")
    print(f"First few rows:\n{df.head()}")

    # Swap the 'upper' and 'lower' column locations
    columns = list(df.columns)
    upper_index = columns.index('upper')
    lower_index = columns.index('lower')

    # Swap the positions
    columns[upper_index], columns[lower_index] = columns[lower_index], columns[upper_index]

    # Reorder the DataFrame
    df = df[columns]

    columns_to_format = ['val', 'upper', 'lower']
    df[columns_to_format] = df[columns_to_format].map(lambda x: f"{x:.2f}")

    filtered_df = df[((df['measure_id'] == 2) | (df['measure_id'] >= 5))
                     & (((df['age_id'] == 27) & (df['metric_id'] == 3)) | ((df['age_id'] == 22) & (df['metric_id'] == 1)))
                     & ((df['year'] == 1990) | (df['year'] == 2021))
                     & (df['sex_id']==3)]
    print(f"Filtered Prevalance rows: {filtered_df.shape[0]}")
    if not filtered_df.empty:
        prevalance5_df = pd.concat([prevalance5_df, filtered_df], ignore_index=True)


if prevalance5_df.empty:
    print("No data matched the filtering condition. The resulting DataFrame is empty.")
else:
    prevalance5_df.to_csv("new_data/comparison_1990_2021.csv", index=False)
    print("Prevalence.csv has been created successfully.")

Reading file: ./IHME-GBD_2021_DATA-575f1e63-1.csv
Columns: Index(['measure_id', 'measure_name', 'location_id', 'location_name', 'sex_id',
       'sex_name', 'age_id', 'age_name', 'cause_id', 'cause_name', 'metric_id',
       'metric_name', 'year', 'val', 'upper', 'lower'],
      dtype='object')
First few rows:
   measure_id                            measure_name  location_id  \
0           2  DALYs (Disability-Adjusted Life Years)            1   
1           2  DALYs (Disability-Adjusted Life Years)            1   
2           2  DALYs (Disability-Adjusted Life Years)            1   
3           2  DALYs (Disability-Adjusted Life Years)            1   
4           2  DALYs (Disability-Adjusted Life Years)            1   

  location_name  sex_id sex_name  age_id  age_name  cause_id     cause_name  \
0        Global       1     Male       1  <5 years       559  Schizophrenia   
1        Global       2   Female       1  <5 years       559  Schizophrenia   
2        Global       3     Bo

In [6]:
# Read data from GHD2021_Schizophrenia/Schiz/
prevalance5_df = pd.DataFrame()
file_path = "./"
for x in range(1, 2):
    #file_name = file_path + f"IHME-GBD_2021_DATA-851501aa-{x}.csv"
    file_name = file_path + f"IHME-GBD_2021_DATA-575f1e63-1.csv"
    if not os.path.isfile(file_name):
        print(f"File not found: {file_name}")
        continue

    df = pd.read_csv(file_name)
    print(f"Reading file: {file_name}")
    print(f"Columns: {df.columns}")
    print(f"First few rows:\n{df.head()}")

    filtered_df = df[(df['measure_id'] == 5) & (df['age_id'] == 27) & (df['sex_id'] == 3) & (df['metric_id'] == 3)]
    print(f"Filtered Prevalance rows: {filtered_df.shape[0]}")

    if not filtered_df.empty:
        prevalance5_df = pd.concat([prevalance5_df, filtered_df], ignore_index=True)


if prevalance5_df.empty:
    print("No data matched the filtering condition. The resulting DataFrame is empty.")
else:
    prevalance5_df.to_csv("new_data/age_std_Prevalence_test.csv", index=False)
    print("Prevalence.csv has been created successfully.")

Reading file: ./IHME-GBD_2021_DATA-575f1e63-1.csv
Columns: Index(['measure_id', 'measure_name', 'location_id', 'location_name', 'sex_id',
       'sex_name', 'age_id', 'age_name', 'cause_id', 'cause_name', 'metric_id',
       'metric_name', 'year', 'val', 'upper', 'lower'],
      dtype='object')
First few rows:
   measure_id                            measure_name  location_id  \
0           2  DALYs (Disability-Adjusted Life Years)            1   
1           2  DALYs (Disability-Adjusted Life Years)            1   
2           2  DALYs (Disability-Adjusted Life Years)            1   
3           2  DALYs (Disability-Adjusted Life Years)            1   
4           2  DALYs (Disability-Adjusted Life Years)            1   

  location_name  sex_id sex_name  age_id  age_name  cause_id     cause_name  \
0        Global       1     Male       1  <5 years       559  Schizophrenia   
1        Global       2   Female       1  <5 years       559  Schizophrenia   
2        Global       3     Bo

Filter for comparison 1990 2021

In [None]:
# Read data from GHD2021_Schizophrenia/Schiz/
prevalance5_df = pd.DataFrame()
file_path = "./"
for x in range(1, 2):
    #file_name = file_path + f"IHME-GBD_2021_DATA-851501aa-{x}.csv"
    file_name = file_path + f"IHME-GBD_2021_DATA-575f1e63-1.csv"
    if not os.path.isfile(file_name):
        print(f"File not found: {file_name}")
        continue

    df = pd.read_csv(file_name)
    print(f"Reading file: {file_name}")
    print(f"Columns: {df.columns}")
    print(f"First few rows:\n{df.head()}")

    filtered_df = df[(df['measure_id'] == 5) & (df['age_id'] == 27)]
    print(f"Filtered Prevalance rows: {filtered_df.shape[0]}")

    if not filtered_df.empty:
        prevalance5_df = pd.concat([prevalance5_df, filtered_df], ignore_index=True)


if prevalance5_df.empty:
    print("No data matched the filtering condition. The resulting DataFrame is empty.")
else:
    prevalance5_df.to_csv("new_data/age_std_Prevalence.csv", index=False)
    print("Prevalence.csv has been created successfully.")