# Analysis of Scientific Articles

## 1️⃣ Initial Data Exploration

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load data
df = pd.read_excel('../data/df_articles_results.xlsx')

# Total number of articles
num_articles = len(df)
print(f'Total number of articles: {num_articles}')

# Number of included and excluded articles
inclusion_counts = df['GlobalInclusion'].value_counts()
print(inclusion_counts)

## 2️⃣ Visualizations

### 📊 Number of Articles by Study Type

In [None]:
# Count study types
design_counts = df['mistral.small.24b.instruct.2501_study_design'].value_counts()

# Bar chart
plt.figure(figsize=(10,5))
design_counts.plot(kind='bar')
plt.xlabel("Study Type")
plt.ylabel("Number of Articles")
plt.title("Distribution of Study Types")
plt.xticks(rotation=45)
plt.show()

### 🥧 Inclusion Proportion

In [None]:
# Pie chart
plt.figure(figsize=(6,6))
inclusion_counts.plot(kind='pie', autopct='%1.1f%%')
plt.title("Proportion of Included/Excluded Articles")
plt.ylabel('')
plt.show()

### 📈 Histogram of Agreement Between Models

In [None]:
# Histogram of PairwiseAgreement
plt.figure(figsize=(8,5))
df['PairwiseAgreement'].hist(bins=10, edgecolor='black')
plt.xlabel("Agreement Value")
plt.ylabel("Number of Articles")
plt.title("Distribution of Agreement Between Models")
plt.show()

## 3️⃣ Analysis of Model Agreement

In [None]:
# Mean of PairwiseAgreement
pairwise_mean = df['PairwiseAgreement'].mean()
print(f'Mean agreement between models: {pairwise_mean:.2f}')