In [None]:
import os
import pandas as pd
import json
from pathlib import Path

# 1. THE UNIVERSAL WAY: Get the current directory 
try:
    # Works as a script (.py)
    BASE_DIR = Path(__file__).resolve().parent
except NameError:
    # Works in a Notebook (.ipynb)
    BASE_DIR = Path.cwd()

# 2. ADAPTED NAVIGATION:
# .parent moves up from 'mcp_tools_development' to 'ai-engeneering-study-mcp'
# Then we join into 'data_prepared' and use the plural 'events.json'
INPUT_FILE = BASE_DIR.parent / 'data_prepared' / 'events.json'

# Simple check to verify
if INPUT_FILE.exists():
    print(f"✅ Setup complete. Input file found: {INPUT_FILE}")
else:
    print(f"❌ Warning: Input file NOT found at {INPUT_FILE}")
    # Debugging: show what BASE_DIR actually is
    print(f"Current BASE_DIR: {BASE_DIR}")

In [None]:
# Load the raw JSON
with open(INPUT_FILE, 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

# 1. Load the main data into a DataFrame
df = pd.DataFrame(raw_data)

# 2. Flatten ONLY the 'eventGuests' column
# This turns the dictionary into a mini-dataframe
guests_df = pd.json_normalize(df['eventGuests'])

# 3. Rename columns to keep the 'eventGuests_' prefix for clarity
guests_df.columns = [f"eventGuests_{col}" for col in guests_df.columns]

# 4. Join the new columns back to the original data and drop the old dict column
events_df = pd.concat([df.drop(columns=['eventGuests']), guests_df], axis=1)

print(f"✅ Successfully processed {len(events_df)} records.")
print(f"Flattened columns: {list(guests_df.columns)}")

# Display result
display(events_df.head(3))

In [None]:
# Convert date string to actual datetime objects
events_df['date'] = pd.to_datetime(events_df['date'], errors='coerce')

# Preview the current state of the columns
print("--- Final Column List ---")
print(events_df.columns.tolist())

print("\n--- Data Types Check ---")
print(events_df.dtypes[['date', 'eventGuests_total', 'eventGuests_going']])

display(events_df.head())

In [None]:
def get_num_monthly_event_stats(year_val, month_val):
    """
    Counts events for a specific year and month.
    Month is given with a number.
    """
    # Use 'events_df' from the previous cells
    # We use .dt because we converted the column to datetime in Cell-3
    filtered = events_df[
        (events_df['date'].dt.year == year_val) & 
        (events_df['date'].dt.month == month_val)
    ]
    
    return len(filtered)

def get_str_monthly_event_stats(year: int, month_name: str):
    """
    Counts events for a specific year and month.
    Month is given by a full name or abbreviation.
    """
    # 1. Handle Hungarian month variations
    month_map = {
        "január": 1, "jan": 1, "február": 2, "feb": 2, "március": 3, "már":3, "április": 4, "ápr":4,
        "május": 5, "máj":5, "június": 6, "jún":6, "július": 7, "júl":7, "augusztus": 8, "aug":8,
        "szeptember": 9, "szept":9, "október": 10, "okt":10, "november": 11, "nov":11, "december": 12, "dec": 12 
    }
    
    # Convert input to a number
    month_num = month_map.get(month_name.lower().replace(".", ""), None)
    if not month_num:
        return "Érvénytelen hónap név."

    mask = (events_df['date'].dt.year == year) & (events_df['date'].dt.month == month_num)
    count = len(events_df[mask])
  
    return f"{year} {month_name} hónapjában {count} eseményt találtam."


def get_yearly_event_stats(year_val):
    """
    Counts events for a specific year.
    """
    # Use 'events_df' from the previous cells
    # We use .dt because we converted the column to datetime in Cell-3
    filtered = events_df[
        (events_df['date'].dt.year == year_val) 
    ]
    
    return len(filtered)

print(f"Events in Okt 2023: {get_num_monthly_event_stats(2023, 10)}")
print(f"Events in Szept 2025: {get_num_monthly_event_stats(2025, 9)}")
print(f"Events in 2021: {get_yearly_event_stats(2021)}")
print(f"Events in 2020: {get_yearly_event_stats(2020)}")

print(get_str_monthly_event_stats(2023, "Ápr"))

In [None]:
def get_event_count(year_val, month_val=None):
    """
    Counts events for a specific year, and optionally a specific month.
    month_val can be an integer (1-12) or a string (e.g., "December" or "december").
    """
    # 1. Handle Month mapping (Integer or Hungarian/English String)
    month_map = {
        "január": 1, "jan": 1, "február": 2, "feb": 2, "március": 3, "már":3, "április": 4, "ápr":4,
        "május": 5, "máj":5, "június": 6, "jún":6, "július": 7, "júl":7, "augusztus": 8, "aug":8,
        "szeptember": 9, "szept":9, "október": 10, "okt":10, "november": 11, "nov":11, "december": 12, "dec": 12
    }

    # Start with a mask for the year
    mask = (events_df['date'].dt.year == year_val)

    # 2. If a month is provided, update the mask
    if month_val is not None:
        if isinstance(month_val, str):
            # Clean string and get number from map
            month_num = month_map.get(month_val.lower().replace(".", ""), None)
            if not month_num:
                return f"⚠️ Error: '{month_val}' is not a valid month name."
        else:
            month_num = month_val
        
        mask = mask & (events_df['date'].dt.month == month_num)

    count = len(events_df[mask])
    
    # Return a formatted string or just the number, depending on your preference
    scope = f"{month_val} " if month_val else ""
    return f"Events in {year_val} {scope}: {count}"

# --- Test the merged function ---
print(get_event_count(2023, 12))      # Year + Integer Month
print(get_event_count(2025, "Feb"))   # Year + String Month
print(get_event_count(2025))          # Year only