# Pharmacy Claims Analysis
This notebook demonstrates how to analyze synthetic pharmacy claims data using Python to identify cost trends and medication adherence insights.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

## Load Data

In [None]:
df = pd.read_csv('pharmacy_claims.csv', parse_dates=['fill_date'])
df.head()

## Feature Engineering
Calculate refill gaps and member-level medication possession ratio (MPR).

In [None]:
# Sort by member and fill_date
df = df.sort_values(by=['member_id', 'fill_date'])

# Calculate refill gap
df['prev_fill'] = df.groupby('member_id')['fill_date'].shift(1)
df['refill_gap_days'] = (df['fill_date'] - df['prev_fill']).dt.days - df['days_supply']
df['refill_gap_days'] = df['refill_gap_days'].apply(lambda x: x if x > 0 else 0)

# MPR calculation
member_summary = df.groupby('member_id').agg({
    'days_supply': 'sum',
    'fill_date': [min, max]
}).reset_index()
member_summary.columns = ['member_id', 'total_days_supplied', 'first_fill', 'last_fill']
member_summary['period'] = (member_summary['last_fill'] - member_summary['first_fill']).dt.days + 30
member_summary['MPR'] = member_summary['total_days_supplied'] / member_summary['period']
member_summary.head()

## Cost Summary

In [None]:
monthly_costs = df.copy()
monthly_costs['month'] = monthly_costs['fill_date'].dt.to_period('M')
monthly_summary = monthly_costs.groupby('month')['claim_cost'].sum().reset_index()
monthly_summary['month'] = monthly_summary['month'].astype(str)
monthly_summary

## Visualizations

In [None]:
plt.figure(figsize=(10,5))
sns.lineplot(data=monthly_summary, x='month', y='claim_cost', marker='o')
plt.title('Total Claim Cost by Month')
plt.xticks(rotation=45)
plt.ylabel('Cost ($)')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.histplot(data=member_summary, x='MPR', bins=10, kde=True)
plt.title('Distribution of Member MPR (Medication Adherence)')
plt.xlabel('MPR')
plt.show()