In [1]:
import sys
import os
import warnings
warnings.filterwarnings('ignore')

project_path = r'D:\agenic_arhitecture\mcp\healthcare_data_analyzer\agents'
if project_path not in sys.path:
    sys.path.append(project_path)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML

from csv_analyzer_agent import FreeCSVAnalyzer
from data_processor import FreeDataProcessor

print("✅ Modules imported successfully")

analyzer = FreeCSVAnalyzer()
processor = FreeDataProcessor()

print("✅ Analyzer and processor initialized")


✅ Modules imported successfully
🔄 Initializing free local models...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


✅ Free text generator loaded successfully!
✅ Free Data Processor initialized
✅ Analyzer and processor initialized


In [2]:
file_path = r'D:\agenic_arhitecture\mcp\data\your_data_file.csv'

create_sample = True  # Set false and update file_path to use your own CSV

if create_sample:
    print("🔧 Creating sample healthcare tweet data for testing...")
    n_records = 1000
    users = [f'user_{i:03d}' for i in range(100)]
    sample_data = {
        'polarity_of_tweet': np.random.choice([0, 2, 4], n_records),
        'id_of_the_tweet': range(1000000, 1000000 + n_records),
        'date_of_the_tweet': pd.date_range('2024-01-01', periods=n_records, freq='2H'),
        'query': ['NO_QUERY'] * n_records,
        'user': np.random.choice(users, n_records),
        'text_of_the_tweet': [f'Sample tweet {i}' for i in range(n_records)]
    }
    df_sample = pd.DataFrame(sample_data)
    os.makedirs(r'D:\agenic_arhitecture\mcp\data', exist_ok=True)
    sample_path = r'D:\agenic_arhitecture\mcp\data\sample_healthcare_tweets.csv'
    df_sample.to_csv(sample_path, index=False)
    file_path = sample_path
    print(f"✅ Sample data created at {sample_path}")

print(f"\n📂 Loading data file: {file_path}")
result = processor.process_file(file_path)

if result.get('success'):
    df = result['dataframe']
    print(f"✅ Data loaded; shape: {df.shape}")
    display(df.head())
else:
    print(f"❌ Failed to load data: {result.get('error')}")
    df = None


🔧 Creating sample healthcare tweet data for testing...
✅ Sample data created at D:\agenic_arhitecture\mcp\data\sample_healthcare_tweets.csv

📂 Loading data file: D:\agenic_arhitecture\mcp\data\sample_healthcare_tweets.csv
✅ CSV loaded successfully with utf-8 encoding
🧹 Data cleaned: (1000, 6) → (1000, 6)
✅ Data loaded; shape: (1000, 6)


Unnamed: 0,polarity_of_tweet,id_of_the_tweet,date_of_the_tweet,query,user,text_of_the_tweet
0,4,1000000,2024-01-01 00:00:00,NO_QUERY,user_090,Sample tweet 0
1,0,1000001,2024-01-01 02:00:00,NO_QUERY,user_061,Sample tweet 1
2,2,1000002,2024-01-01 04:00:00,NO_QUERY,user_077,Sample tweet 2
3,2,1000003,2024-01-01 06:00:00,NO_QUERY,user_015,Sample tweet 3
4,2,1000004,2024-01-01 08:00:00,NO_QUERY,user_024,Sample tweet 4


In [3]:
if df is not None:
    user_query = "What's the max count of tweets per user per day"
    print(f"🤖 Running analysis for query: {user_query}")

    analysis_result = analyzer.analyze_csv(file_path, user_query)
    if analysis_result['success']:
        print("📊 Analysis Results:")
        for i, insight in enumerate(analysis_result['analysis'], 1):
            print(f"{i}. {insight}")

        print("\n💻 Generated Python code:")
        print(analysis_result['code'])

        if 'html_output' in analysis_result:
            display(HTML(analysis_result['html_output']))
    else:
        print(f"❌ Analysis failed: {analysis_result['error']}")
else:
    print("⚠️ No data loaded, skipping analysis")


🤖 Running analysis for query: What's the max count of tweets per user per day
✅ File loaded with utf-8 encoding
📊 Analysis Results:
1. 📈 Maximum tweets per user per day: 3
2. 👥 Users with maximum tweets on specific days:
3.   • user_032 on 2024-03-01: 3 tweets
4.   • user_066 on 2024-03-23: 3 tweets
5. 📊 Total users: 100
6. 📊 Total tweets: 1000
7. 📊 Average tweets per user: 10.00

💻 Generated Python code:

# Generated code for analysis: What's the max count of tweets per user per day
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV file (update path as needed)
df = pd.read_csv('your_file.csv', encoding='utf-8')

# Basic information
print("📊 Dataset shape:", df.shape)
print("📊 Columns:", df.columns.tolist())
print("📊 Data types:")
print(df.dtypes)

# Analysis based on your query

# Tweet analysis: Find max tweets per user per day
if 'user' in df.columns:
    # Find date column
    date_col = None
    for col in df.columns:
        if 'date' in col.l

In [4]:
def analyze_healthcare_tweets(df):
    if df is None:
        print("❌ No data for analysis.")
        return

    user_col = next((c for c in df.columns if 'user' in c.lower()), None)
    date_col = next((c for c in df.columns if 'date' in c.lower()), None)

    if user_col and date_col:
        df['date_processed'] = pd.to_datetime(df[date_col], errors='coerce')
        df['date_only'] = df['date_processed'].dt.date

        daily_counts = df.groupby([user_col, 'date_only']).size()
        max_tweets = daily_counts.max()
        max_users = daily_counts[daily_counts == max_tweets]

        print(f"📈 Max tweets per user per day: {max_tweets}")
        print("👥 Users with max tweets:")
        for (user, date), count in max_users.head(10).items():
            print(f" - {user} on {date}: {count} tweets")
    else:
        print("⚠️ Missing columns for tweet analysis. Found:", df.columns.tolist())

if df is not None:
    analyze_healthcare_tweets(df)


📈 Max tweets per user per day: 3
👥 Users with max tweets:
 - user_032 on 2024-03-01: 3 tweets
 - user_066 on 2024-03-23: 3 tweets
