<a href="https://colab.research.google.com/github/alyssafoglia/Attendance-AI/blob/main/TrainingAttendanceAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import re
import itertools
from datetime import datetime
from google.colab import files

# Upload the Excel file
uploaded = files.upload()
for fn in uploaded.keys():
    file_path = fn

# Load Excel file and skip initial metadata rows
df = pd.read_excel(file_path, skiprows=9)

# Normalize names
def normalize_name(name):
    if not isinstance(name, str):
        return None
    name = re.sub(r'\([^)]*\)', '', name)  # Remove text in parentheses
    name = re.sub(r'\s+', ' ', name).strip()
    if ',' in name:
        parts = name.split(',', 1)
        return f"{parts[1].strip()} {parts[0].strip()}"
    return name

df["Name_clean"] = df["Name"].apply(normalize_name)
df["Email_clean"] = df["Email"].str.lower() if "Email" in df.columns else ""

# Extract team/location from parentheses
df["Team"] = df["Name"].str.extract(r"\((.*?)\)")

print("✅ File loaded and attendees parsed.\n")

# Summary
print("Meeting Summary")
print("Total attendees:", len(df))

if 'In-Meeting Duration' in df.columns:
    df["Duration_minutes"] = pd.to_timedelta(df["In-Meeting Duration"], errors='coerce').dt.total_seconds() / 60
    avg_duration = df["Duration_minutes"].mean()
    print(f"Average duration: {avg_duration:.1f} minutes")

    print("\nTop attendees by time:")
    top_attendees = df.sort_values("Duration_minutes", ascending=False).head(5)
    for _, row in top_attendees.iterrows():
        print(f" - {row['Name_clean']} ({row['Duration_minutes']:.1f} mins)")

# Q&A loop
print("\nAsk a question about the attendance data (type 'exit' to stop):")
last_result = pd.DataFrame()

while True:
    question = input("\nYour question: ").strip().lower()
    if question in ["exit", "quit"]:
        break

    # Summary
    if "summary" in question:
        print("Attendance Summary")
        print("Total attendees:", len(df))

        if 'Duration_minutes' in df.columns:
            avg_duration = df["Duration_minutes"].mean()
            print(f"Average duration: {avg_duration:.1f} minutes")

            print("\nTop attendees by time:")
            top_attendees = df.sort_values("Duration_minutes", ascending=False).head(5)
            for _, row in top_attendees.iterrows():
                print(f" - {row['Name_clean']} ({row['Duration_minutes']:.1f} mins)")

        team_counts = df["Team"].dropna().value_counts()
        if not team_counts.empty:
            print("\nTeam breakdown:")
            for team, count in team_counts.items():
                print(f" - {team}: {count} attendee(s)")
        else:
            print("No team information found.")
        continue

    # Location/Team-based filtering
    location_match = re.search(r"(from|list)\s+([a-zA-Z0-9&\-]+)", question, re.IGNORECASE)
    if location_match:
        location = location_match.group(2).upper()
        matches = df[df["Team"].str.upper().str.contains(location, na=False)]

        if not matches.empty:
            print(f"People from {location}:")
            for name in matches["Name_clean"].dropna().unique():
                print(f" - {name}")
            last_result = matches.copy()
        else:
            print(f"No attendees found from {location}.")
            last_result = pd.DataFrame()
        continue

    # Export result
    if "export" in question or "save" in question:
        if not last_result.empty:
            filename = input("Enter filename (without .csv): ").strip() or f"export_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
            last_result.to_csv(f"{filename}.csv", index=False)
            files.download(f"{filename}.csv")
        else:
            print("No filtered result to export.")
        continue

    # Search people by name
    words = re.findall(r"[a-zA-Z'’-]+", question)
    found = pd.DataFrame()

    for first, last in itertools.combinations(words, 2):
        temp = df[df["Name_clean"].str.lower().str.contains(first.lower()) &
                  df["Name_clean"].str.lower().str.contains(last.lower())]
        if not temp.empty:
            found = temp
            break

    if found.empty and words:
        found = df[df["Name_clean"].str.lower().apply(
            lambda x: any(word.lower() in x for word in words) if isinstance(x, str) else False
        )]

    if not found.empty:
        row = found.iloc[0]
        name_display = row["Name_clean"]

        if any(kw in question for kw in ["time", "join"]):
            print(f"➡️  {name_display} joined at {row.get('First Join', 'Unknown')}")
        elif "leave" in question:
            print(f"➡️  {name_display} left at {row.get('Last Leave', 'Unknown')}")
        elif any(kw in question for kw in ["duration", "how long", "long"]):
            print(f"➡️  {name_display} was in the meeting for {row.get('In-Meeting Duration', 'Unknown')}")
        elif "email" in question:
            print(f"➡️  {name_display}'s email is {row.get('Email', 'Unknown')}")
        else:
            print(f"✅ Yes, {name_display} was there.")
        last_result = found.copy()
    else:
        print("❌ Person not found or unclear question.")


Saving AWS Skillbuilder Info Session - Attendance report 10-17-24.xlsx to AWS Skillbuilder Info Session - Attendance report 10-17-24 (1).xlsx
✅ File loaded and attendees parsed.

Meeting Summary
Total attendees: 143
Average duration: 29.7 minutes

Top attendees by time:
 - Paulie (46.3 mins)
 - Paulie (46.3 mins)
 - Sarah Fabius (46.3 mins)
 - Sarah Fabius (46.3 mins)
 - Frank AWS (46.2 mins)

Ask a question about the attendance data (type 'exit' to stop):

Your question: was alyssa there
❌ Person not found or unclear question.

Your question: was sarah there
✅ Yes, Sarah Fabius was there.

Your question: what is sarah's email
➡️  Franz Nisswandt's email is Franz.Nisswandt@lexisnexisrisk.com

Your question: what is Sarah Fabius's email
➡️  Sarah Fabius's email is Sarah.Fabius@lexisnexisrisk.com

Your question: list the BCT people there
No attendees found from THE.

Your question: list BCT people 
People from BCT:
 - Sarah Fabius
 - Kenneth Fruit
 - Jimmy Codio
 - Alicia Falkowska
 - Ka

KeyboardInterrupt: Interrupted by user