# User Behavior Analysis

Analyze user behavior patterns from activity logs

Data source node: Loads user information from the database

In [None]:
# ===== System-managed metadata (auto-generated, understand to edit) =====
# @node_type: data_source
# @node_id: load_user_data
# @execution_status: validated
# @name: Load User Data
# ===== End of system-managed metadata =====

import pandas as pd

# Load user data from CSV
user_data = pd.read_csv('users.csv')
print(f"Loaded {len(user_data)} users")

In [None]:
# @node_id: load_user_data
# @result_format: parquet
import pandas as pd
import os

# Load result from parquet
result_path = r'../projects/test_user_behavior_analysis/parquets/load_user_data.parquet'
if os.path.exists(result_path):
    load_user_data = pd.read_parquet(result_path)
    display(load_user_data)
else:
    print(f"Result file not found: {result_path}")

Data source node: Loads user activity events

In [None]:
# ===== System-managed metadata (auto-generated, understand to edit) =====
# @node_type: data_source
# @node_id: load_activity_data
# @execution_status: validated
# @name: Load Activity Data
# ===== End of system-managed metadata =====

import pandas as pd

# Load user activity logs
activity_data = pd.read_csv('activity.csv')
print(f"Loaded {len(activity_data)} activity records")

In [None]:
# @node_id: load_activity_data
# @result_format: parquet
import pandas as pd
import os

# Load result from parquet
result_path = r'../projects/test_user_behavior_analysis/parquets/load_activity_data.parquet'
if os.path.exists(result_path):
    load_activity_data = pd.read_parquet(result_path)
    display(load_activity_data)
else:
    print(f"Result file not found: {result_path}")

Compute node: Joins user and activity datasets

In [None]:
# ===== System-managed metadata (auto-generated, understand to edit) =====
# @node_type: compute
# @node_id: merge_datasets
# @execution_status: validated
# @depends_on: [load_user_data, load_activity_data]
# @name: Merge Datasets
# ===== End of system-managed metadata =====

import pandas as pd

# Merge user and activity data
merged_data = user_data.merge(
    activity_data,
    on='user_id',
    how='left'
)
print(f"Merged dataset shape: {merged_data.shape}")

In [None]:
# @node_id: merge_datasets
# @result_format: parquet
import pandas as pd
import os

# Load result from parquet
result_path = r'../projects/test_user_behavior_analysis/parquets/merge_datasets.parquet'
if os.path.exists(result_path):
    merge_datasets = pd.read_parquet(result_path)
    display(merge_datasets)
else:
    print(f"Result file not found: {result_path}")

Compute node: Calculates summary statistics

In [None]:
# ===== System-managed metadata (auto-generated, understand to edit) =====
# @node_type: compute
# @node_id: compute_statistics
# @execution_status: validated
# @depends_on: [merge_datasets]
# @name: Compute Statistics
# ===== End of system-managed metadata =====

import pandas as pd

# Calculate statistics
statistics = {
    'total_users': len(merged_data['user_id'].unique()),
    'total_activities': len(merged_data),
    'avg_age': merged_data['age'].mean(),
    'premium_ratio': (user_data['premium'].sum() / len(user_data))
}
print(statistics)

In [None]:
# @node_id: compute_statistics
# @result_format: parquet
import pandas as pd
import os

# Load result from parquet
result_path = r'../projects/test_user_behavior_analysis/parquets/compute_statistics.parquet'
if os.path.exists(result_path):
    compute_statistics = pd.read_parquet(result_path)
    display(compute_statistics)
else:
    print(f"Result file not found: {result_path}")

Compute node: Generates final analysis report

In [None]:
# ===== System-managed metadata (auto-generated, understand to edit) =====
# @node_type: compute
# @node_id: generate_report
# @execution_status: validated
# @depends_on: [compute_statistics]
# @name: Generate Report
# ===== End of system-managed metadata =====

import pandas as pd

# Generate final report
report = {
    'title': 'User Behavior Analysis Report',
    'generated_at': '2024-11-07',
    'status': 'completed'
}
print(report)

In [None]:
# @node_id: generate_report
# @result_format: parquet
import pandas as pd
import os

# Load result from parquet
result_path = r'../projects/test_user_behavior_analysis/parquets/generate_report.parquet'
if os.path.exists(result_path):
    generate_report = pd.read_parquet(result_path)
    display(generate_report)
else:
    print(f"Result file not found: {result_path}")