**Obesity research in specialty journals from 2000 to 2023: A bibliometric analysis**

In [None]:
import os
import pandas as pd
from google.colab import drive

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

path_dir = '/content/drive/My Drive/DATASETS/OBESITY/'

Mounted at /content/drive


In [None]:
# Define the CSV filenames
filenames = [
    'gender_results_2000-2023.csv',
    'gender_results_2016-2023.csv',
    'gender_results_2008-2015.csv',
    'gender_results_2000-2007.csv'
]

# Loop through each file and compute mean and standard deviation
for filename in filenames:
    file_path = os.path.join(path_dir, filename)
    df = pd.read_csv(file_path)

    print(f"\nStatistics for {filename}:")
    for column in ['male', 'female', 'undetermined']:
        if column in df.columns:
            mean_val = df[column].mean()
            std_val = df[column].std()
            print(f"  {column} - Mean: {mean_val:.2f}, Std Dev: {std_val:.2f}")
        else:
            print(f"  Column '{column}' not found in {filename}")


Statistics for gender_results_2000-2023.csv:
  male - Mean: 3.23, Std Dev: 2.83
  female - Mean: 2.49, Std Dev: 2.31
  undetermined - Mean: 0.28, Std Dev: 0.70

Statistics for gender_results_2016-2023.csv:
  male - Mean: 3.39, Std Dev: 3.35
  female - Mean: 2.93, Std Dev: 2.67
  undetermined - Mean: 0.33, Std Dev: 0.76

Statistics for gender_results_2008-2015.csv:
  male - Mean: 3.15, Std Dev: 2.42
  female - Mean: 2.41, Std Dev: 2.01
  undetermined - Mean: 0.25, Std Dev: 0.68

Statistics for gender_results_2000-2007.csv:
  male - Mean: 2.98, Std Dev: 2.06
  female - Mean: 1.62, Std Dev: 1.55
  undetermined - Mean: 0.20, Std Dev: 0.56


In [None]:
# Initialize list for storing results
results = []

for file in filenames:
    # Derive the period name from the filename
    period = file.replace("gender_results_", "").replace(".csv", "")

    # Read the CSV
    df = pd.read_csv(os.path.join(path_dir, file))

    # Sum across all rows (in case multiple rows exist)
    total_male = df['male'].sum()
    total_female = df['female'].sum()
    total_und = df['undetermined'].sum()

    # Total authors
    total = total_male + total_female + total_und

    # Calculate percentages
    pct_male = (total_male / total) * 100 if total > 0 else 0
    pct_female = (total_female / total) * 100 if total > 0 else 0
    pct_und = (total_und / total) * 100 if total > 0 else 0

    # Store results
    results.append({
        'Period': period,
        'Total Authors': total,
        'Male (%)': round(pct_male, 2),
        'Female (%)': round(pct_female, 2),
        'Undetermined (%)': round(pct_und, 2)
    })

# Convert results into a dataframe
results_df = pd.DataFrame(results)

# Print results
print(results_df)

      Period  Total Authors  Male (%)  Female (%)  Undetermined (%)
0  2000-2023         180329     53.80       41.58              4.62
1  2016-2023          89680     50.95       44.05              5.00
2  2008-2015          63183     54.29       41.48              4.23
3  2000-2007          27466     61.99       33.75              4.26


In [None]:
# Save results to CSV
output_file = os.path.join(path_dir, "gender_percentages_by_period.csv")
results_df.to_csv(output_file, index=False)