In [37]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import json
import pickle
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Libraries imported successfully!")

‚úÖ Libraries imported successfully!


In [38]:
class SmartFinanceAI:
    """
    AI Model untuk analisis keuangan personal
    - Spending prediction (Linear Regression)
    - Anomaly detection (Statistical methods)
    - Budget recommendation
    - Spending pattern analysis
    """
    
    def __init__(self):
        self.is_trained = False
        self.spending_patterns = {}
        self.category_stats = {}
        self.monthly_stats = {}
        self.income_stats = {}
        self.model_weights = {}
        
    def load_data(self, filepath='/kaggle/input/smart-personal-finance-analyst-dataset/financial_transactions.csv'):
        """Load data dari CSV"""
        print(f"Loading data from {filepath}...")
        df = pd.read_csv(filepath)
        df['date'] = pd.to_datetime(df['date'])
        print(f"Loaded {len(df)} transactions")
        return df

In [39]:
    def train(self, data):
        """
        Train model dengan data transaksi
        """
        print("\n" + "="*60)
        print("TRAINING SMART FINANCE AI MODEL")
        print("="*60)
        
        # Prepare data
        data['month'] = data['date'].dt.to_period('M')
        data['day_of_week'] = data['date'].dt.dayofweek
        data['week_of_month'] = data['date'].dt.day // 7 + 1
        
        expenses = data[data['type'] == 'expense'].copy()
        income = data[data['type'] == 'income'].copy()
        
        print(f"\nTraining Data Overview:")
        print(f"   Total transactions: {len(data)}")
        print(f"   Expenses: {len(expenses)}")
        print(f"   Income: {len(income)}")
        print(f"   Date range: {data['date'].min()} to {data['date'].max()}")
        
        # 1. Analyze spending patterns by category
        print(f"\nAnalyzing spending patterns...")
        for category in expenses['category'].unique():
            cat_data = expenses[expenses['category'] == category]['amount']
            
            self.category_stats[category] = {
                'mean': float(cat_data.mean()),
                'std': float(cat_data.std()),
                'median': float(cat_data.median()),
                'min': float(cat_data.min()),
                'max': float(cat_data.max()),
                'count': int(len(cat_data)),
                'total': float(cat_data.sum()),
                'q25': float(cat_data.quantile(0.25)),
                'q75': float(cat_data.quantile(0.75))
            }
        
        # 2. Monthly trend analysis
        print(f"Analyzing monthly trends...")
        monthly_expense = expenses.groupby('month')['amount'].agg(['sum', 'mean', 'count', 'std'])
        monthly_income = income.groupby('month')['amount'].sum()
        
        self.monthly_stats = {
            'expense': monthly_expense.to_dict('index'),
            'income': monthly_income.to_dict()
        }
        
        # 3. Income analysis
        print(f"Analyzing income patterns...")
        self.income_stats = {
            'avg_monthly': float(monthly_income.mean()),
            'total': float(income['amount'].sum()),
            'sources': income['category'].value_counts().to_dict()
        }
        
        # 4. Train Linear Regression for spending prediction
        print(f"Training prediction model...")
        months = sorted(list(set(expenses['month'])))
        X = np.arange(len(months)).reshape(-1, 1)
        y = np.array([expenses[expenses['month'] == m]['amount'].sum() 
                      for m in months])
        
        # Simple Linear Regression
        X_mean = X.mean()
        y_mean = y.mean()
        
        numerator = ((X.flatten() - X_mean) * (y - y_mean)).sum()
        denominator = ((X.flatten() - X_mean) ** 2).sum()
        
        slope = numerator / denominator if denominator != 0 else 0
        intercept = y_mean - slope * X_mean
        
        self.model_weights = {
            'slope': float(slope),
            'intercept': float(intercept),
            'n_months': len(months)
        }
        
        # 5. Calculate category weights (spending distribution)
        total_spending = expenses['amount'].sum()
        self.spending_patterns = {
            cat: float(stats['total'] / total_spending) 
            for cat, stats in self.category_stats.items()
        }
        
        self.is_trained = True
        
        print(f"\nModel training completed!")
        print(f"   Categories trained: {len(self.category_stats)}")
        print(f"   Months analyzed: {len(months)}")
        
        return self

In [40]:
    def predict_next_month_spending(self):
        """Prediksi spending bulan depan"""
        if not self.is_trained:
            return None
        
        slope = self.model_weights['slope']
        intercept = self.model_weights['intercept']
        n = self.model_weights['n_months']
        
        # Predict next month
        next_month_spending = slope * n + intercept
        
        # Calculate confidence based on historical variance
        monthly_expenses = list(self.monthly_stats['expense'].values())
        historical_std = np.std([m['sum'] for m in monthly_expenses])
        confidence = max(0.6, min(0.95, 1 - (historical_std / next_month_spending)))
        
        trend = 'increasing' if slope > 0 else 'decreasing'
        trend_strength = abs(slope) / (intercept + 1e-6)
        
        return {
            'predicted_amount': float(next_month_spending),
            'trend': trend,
            'trend_strength': float(trend_strength),
            'confidence': float(confidence),
            'range': {
                'min': float(next_month_spending - historical_std),
                'max': float(next_month_spending + historical_std)
            }
        }

In [41]:
    def detect_anomalies(self, transactions, threshold=2.5):
        """
        Deteksi transaksi yang tidak biasa menggunakan Z-score
        """
        anomalies = []
        
        for _, trans in transactions.iterrows():
            if trans['type'] == 'expense':
                category = trans['category']
                amount = trans['amount']
                
                if category in self.category_stats:
                    stats = self.category_stats[category]
                    
                    # Calculate Z-score
                    z_score = (amount - stats['mean']) / (stats['std'] + 1e-6)
                    
                    if abs(z_score) > threshold:
                        severity = 'high' if abs(z_score) > 3 else 'medium'
                        
                        anomalies.append({
                            'date': str(trans['date']),
                            'transaction_id': trans.get('transaction_id', 'N/A'),
                            'category': category,
                            'amount': float(amount),
                            'expected_mean': float(stats['mean']),
                            'expected_range': f"Rp {stats['q25']:,.0f} - Rp {stats['q75']:,.0f}",
                            'z_score': float(z_score),
                            'severity': severity,
                            'message': f"Unusual {category} expense of Rp {amount:,.0f} (typically Rp {stats['mean']:,.0f})"
                        })
        
        return sorted(anomalies, key=lambda x: abs(x['z_score']), reverse=True)

In [42]:
    def recommend_budget(self):
        """Generate rekomendasi budget per kategori"""
        if not self.is_trained:
            return None
        
        recommendations = {}
        avg_monthly_income = self.income_stats['avg_monthly']
        
        # Rule: 50/30/20 budget rule adjusted
        # 50% needs, 30% wants, 20% savings
        
        needs_categories = ['Food', 'Transport', 'Bills', 'Health']
        wants_categories = ['Entertainment', 'Shopping']
        savings_categories = ['Investment', 'Education']
        
        needs_budget = avg_monthly_income * 0.50
        wants_budget = avg_monthly_income * 0.30
        savings_budget = avg_monthly_income * 0.20
        
        # Distribute based on historical spending patterns
        for category, stats in self.category_stats.items():
            if category in needs_categories:
                proportion = stats['total'] / sum([self.category_stats[c]['total'] 
                                                  for c in needs_categories if c in self.category_stats])
                recommended = needs_budget * proportion
            elif category in wants_categories:
                proportion = stats['total'] / sum([self.category_stats[c]['total'] 
                                                  for c in wants_categories if c in self.category_stats])
                recommended = wants_budget * proportion
            else:
                proportion = stats['total'] / sum([self.category_stats[c]['total'] 
                                                  for c in savings_categories if c in self.category_stats])
                recommended = savings_budget * proportion
            
            avg_actual = stats['total'] / self.model_weights['n_months']
            
            recommendations[category] = {
                'recommended_monthly': float(recommended),
                'current_average': float(avg_actual),
                'difference': float(recommended - avg_actual),
                'status': 'under_budget' if avg_actual < recommended else 'over_budget'
            }
        
        return recommendations

In [43]:
   def generate_insights(self):
        """Generate actionable insights"""
        if not self.is_trained:
            return []
        
        insights = []
        
        # 1. Highest spending category
        top_category = max(self.spending_patterns, key=self.spending_patterns.get)
        top_percentage = self.spending_patterns[top_category] * 100
        
        insights.append({
            'type': 'spending_pattern',
            'priority': 'high',
            'title': 'Top Spending Category',
            'message': f"'{top_category}' accounts for {top_percentage:.1f}% of your total expenses",
            'recommendation': f"Review your {top_category} transactions for potential savings opportunities"
        })
        
        # 2. Savings rate
        avg_monthly_expense = np.mean([m['sum'] for m in self.monthly_stats['expense'].values()])
        avg_monthly_income = self.income_stats['avg_monthly']
        savings_rate = ((avg_monthly_income - avg_monthly_expense) / avg_monthly_income) * 100
        
        if savings_rate < 20:
            insights.append({
                'type': 'savings',
                'priority': 'high',
                'title': 'Low Savings Rate',
                'message': f"Your savings rate is {savings_rate:.1f}%, below the recommended 20%",
                'recommendation': "Consider reducing discretionary spending and automating savings"
            })
        else:
            insights.append({
                'type': 'savings',
                'priority': 'low',
                'title': 'Good Savings Rate',
                'message': f"Great job! Your savings rate is {savings_rate:.1f}%",
                'recommendation': "Maintain this healthy financial habit"
            })
        
        # 3. Spending trend
        prediction = self.predict_next_month_spending()
        if prediction and prediction['trend'] == 'increasing':
            insights.append({
                'type': 'trend',
                'priority': 'medium',
                'title': 'Increasing Spending Trend',
                'message': f"Your spending is trending upward",
                'recommendation': "Monitor your expenses closely to avoid overspending"
            })
        
        return insights

In [44]:
    def save_model(self, filepath='finance_model.pkl'):
        """Save trained model"""
        model_data = {
            'is_trained': self.is_trained,
            'category_stats': self.category_stats,
            'monthly_stats': self.monthly_stats,
            'income_stats': self.income_stats,
            'model_weights': self.model_weights,
            'spending_patterns': self.spending_patterns
        }
        
        with open(filepath, 'wb') as f:
            pickle.dump(model_data, f)
        
        print(f"\nüíæ Model saved to: {filepath}")
    
    def load_model(self, filepath='finance_model.pkl'):
        """Load trained model"""
        with open(filepath, 'rb') as f:
            model_data = pickle.load(f)
        
        self.is_trained = model_data['is_trained']
        self.category_stats = model_data['category_stats']
        self.monthly_stats = model_data['monthly_stats']
        self.income_stats = model_data['income_stats']
        self.model_weights = model_data['model_weights']
        self.spending_patterns = model_data['spending_patterns']
        
        print(f"‚úÖ Model loaded from: {filepath}")
    
    def print_summary(self):
        """Print comprehensive model summary"""
        if not self.is_trained:
            print("‚ùå Model not trained yet!")
            return
        
        print("\n" + "="*60)
        print("üìä SMART FINANCE AI - MODEL SUMMARY")
        print("="*60)
        
        # Category statistics
        print("\nüí≥ Spending by Category:")
        for cat, pct in sorted(self.spending_patterns.items(), 
                              key=lambda x: x[1], reverse=True):
            print(f"   {cat:15s}: {pct*100:5.1f}% (Avg: Rp {self.category_stats[cat]['mean']:,.0f})")
        
        # Prediction
        prediction = self.predict_next_month_spending()
        if prediction:
            print(f"\nüîÆ Next Month Prediction:")
            print(f"   Estimated spending: Rp {prediction['predicted_amount']:,.0f}")
            print(f"   Confidence: {prediction['confidence']*100:.1f}%")
            print(f"   Trend: {prediction['trend'].upper()}")
        
        # Budget recommendations
        budget = self.recommend_budget()
        if budget:
            print(f"\nüí° Budget Recommendations:")
            for cat, rec in list(budget.items())[:5]:
                status = "‚úÖ" if rec['status'] == 'under_budget' else "‚ö†Ô∏è"
                print(f"   {status} {cat:15s}: Rp {rec['recommended_monthly']:,.0f} "
                      f"(Current: Rp {rec['current_average']:,.0f})")
        
        # Insights
        insights = self.generate_insights()
        print(f"\nüéØ Key Insights:")
        for insight in insights:
            print(f"   [{insight['priority'].upper()}] {insight['title']}")
            print(f"      {insight['message']}")

In [45]:
SmartFinanceAI.train = train
SmartFinanceAI.predict_next_month_spending = predict_next_month_spending
SmartFinanceAI.detect_anomalies = detect_anomalies
SmartFinanceAI.recommend_budget = recommend_budget
SmartFinanceAI.generate_insights = generate_insights
SmartFinanceAI.save_model = save_model
SmartFinanceAI.load_model = load_model
SmartFinanceAI.print_summary = print_summary

print("‚úÖ SmartFinanceAI class defined successfully!")

‚úÖ SmartFinanceAI class defined successfully!


In [46]:
model = SmartFinanceAI()

# Load data
data = model.load_data('/kaggle/input/smart-personal-finance-analyst-dataset/financial_transactions.csv')

# Preview data
print("\nüìã Data Preview:")
print(data.head())
print("\nüìä Data Info:")
print(data.info())

Loading data from /kaggle/input/smart-personal-finance-analyst-dataset/financial_transactions.csv...
Loaded 1000 transactions

üìã Data Preview:
  transaction_id       date   time     type   category       amount  \
0       TRX00970 2024-11-22  06:43  expense     Health    623878.35   
1       TRX00618 2024-11-22  06:56   income     Salary   3370294.85   
2       TRX00455 2024-11-22  02:03  expense  Education   4398739.31   
3       TRX00702 2024-11-23  18:25   income   Business  10914129.22   
4       TRX00195 2024-11-25  12:33   income   Business  11198206.28   

                               merchant                         description  \
0                            PD Permata              Animi magnam suscipit.   
1                 CV Hidayat Kusumo Tbk                     Income received   
2               PT Wijaya (Persero) Tbk  Similique impedit blanditiis vero.   
3  CV Handayani Salahudin (Persero) Tbk                     Income received   
4           UD Firgantoro (Perse

In [47]:
model.train(data)


TRAINING SMART FINANCE AI MODEL

Training Data Overview:
   Total transactions: 1000
   Expenses: 810
   Income: 190
   Date range: 2024-11-22 00:00:00 to 2025-11-16 00:00:00

Analyzing spending patterns...
Analyzing monthly trends...
Analyzing income patterns...
Training prediction model...

Model training completed!
   Categories trained: 8
   Months analyzed: 13


<__main__.SmartFinanceAI at 0x79bca26a1a10>

In [48]:
model.print_summary()


üìä SMART FINANCE AI - MODEL SUMMARY

üí≥ Spending by Category:
   Investment     :  44.9% (Avg: Rp 5,685,938)
   Education      :  18.5% (Avg: Rp 2,689,807)
   Health         :  13.0% (Avg: Rp 1,545,819)
   Shopping       :   8.8% (Avg: Rp 1,063,285)
   Bills          :   7.4% (Avg: Rp 875,684)
   Entertainment  :   3.9% (Avg: Rp 508,129)
   Food           :   2.4% (Avg: Rp 264,426)
   Transport      :   1.1% (Avg: Rp 150,009)

üîÆ Next Month Prediction:
   Estimated spending: Rp 116,094,885
   Confidence: 71.6%
   Trend: INCREASING

üí° Budget Recommendations:
   ‚úÖ Health         : Rp 35,867,010 (Current: Rp 12,604,373)
   ‚ö†Ô∏è Education      : Rp 7,708,387 (Current: Rp 18,001,015)
   ‚úÖ Entertainment  : Rp 12,058,409 (Current: Rp 3,752,340)
   ‚úÖ Food           : Rp 6,598,404 (Current: Rp 2,318,809)
   ‚úÖ Transport      : Rp 3,119,399 (Current: Rp 1,096,218)

üéØ Key Insights:
   [HIGH] Top Spending Category
      'Investment' accounts for 44.9% of your total expenses
 

In [49]:
prediction = model.predict_next_month_spending()

print("\n" + "="*60)
print("üîÆ SPENDING PREDICTION FOR NEXT MONTH")
print("="*60)
print(f"Predicted Amount: Rp {prediction['predicted_amount']:,.2f}")
print(f"Trend: {prediction['trend'].upper()}")
print(f"Confidence: {prediction['confidence']*100:.1f}%")
print(f"Expected Range: Rp {prediction['range']['min']:,.0f} - Rp {prediction['range']['max']:,.0f}")



üîÆ SPENDING PREDICTION FOR NEXT MONTH
Predicted Amount: Rp 116,094,885.22
Trend: INCREASING
Confidence: 71.6%
Expected Range: Rp 83,089,189 - Rp 149,100,581


In [50]:
print("\n" + "="*60)
print("üö® ANOMALY DETECTION")
print("="*60)

anomalies = model.detect_anomalies(data, threshold=2.5)

if anomalies:
    print(f"\nFound {len(anomalies)} unusual transactions:\n")
    for i, anom in enumerate(anomalies[:10], 1):
        print(f"{i}. [{anom['severity'].upper()}] {anom['date']}")
        print(f"   {anom['message']}")
        print(f"   Z-score: {anom['z_score']:.2f}")
        print()
else:
    print("\n‚úÖ No anomalies detected. All transactions appear normal.")


üö® ANOMALY DETECTION

‚úÖ No anomalies detected. All transactions appear normal.


In [51]:
print("\n" + "="*60)
print("üí° BUDGET RECOMMENDATIONS")
print("="*60)

budget = model.recommend_budget()

for category, rec in sorted(budget.items(), key=lambda x: x[1]['recommended_monthly'], reverse=True):
    status = "‚úÖ Under Budget" if rec['status'] == 'under_budget' else "‚ö†Ô∏è Over Budget"
    print(f"\n{category}:")
    print(f"  Status: {status}")
    print(f"  Recommended: Rp {rec['recommended_monthly']:,.0f}/month")
    print(f"  Current Avg: Rp {rec['current_average']:,.0f}/month")
    print(f"  Difference: Rp {abs(rec['difference']):,.0f} {'savings potential' if rec['status'] == 'over_budget' else 'safe to spend more'}")



üí° BUDGET RECOMMENDATIONS

Health:
  Status: ‚úÖ Under Budget
  Recommended: Rp 35,867,010/month
  Current Avg: Rp 12,604,373/month
  Difference: Rp 23,262,637 safe to spend more

Shopping:
  Status: ‚úÖ Under Budget
  Recommended: Rp 27,598,364/month
  Current Avg: Rp 8,588,068/month
  Difference: Rp 19,010,296 safe to spend more

Bills:
  Status: ‚úÖ Under Budget
  Recommended: Rp 20,509,809/month
  Current Avg: Rp 7,207,550/month
  Difference: Rp 13,302,259 safe to spend more

Investment:
  Status: ‚ö†Ô∏è Over Budget
  Recommended: Rp 18,729,462/month
  Current Avg: Rp 43,737,985/month
  Difference: Rp 25,008,524 savings potential

Entertainment:
  Status: ‚úÖ Under Budget
  Recommended: Rp 12,058,409/month
  Current Avg: Rp 3,752,340/month
  Difference: Rp 8,306,070 safe to spend more

Education:
  Status: ‚ö†Ô∏è Over Budget
  Recommended: Rp 7,708,387/month
  Current Avg: Rp 18,001,015/month
  Difference: Rp 10,292,628 savings potential

Food:
  Status: ‚úÖ Under Budget
  Recom

In [52]:
print("\n" + "="*60)
print("üéØ ACTIONABLE INSIGHTS")
print("="*60)

insights = model.generate_insights()

for i, insight in enumerate(insights, 1):
    print(f"\n{i}. [{insight['priority'].upper()}] {insight['title']}")
    print(f"   üí¨ {insight['message']}")
    print(f"   üí° {insight['recommendation']}")



üéØ ACTIONABLE INSIGHTS

1. [HIGH] Top Spending Category
   üí¨ 'Investment' accounts for 44.9% of your total expenses
   üí° Review your Investment transactions for potential savings opportunities

2. [LOW] Good Savings Rate
   üí¨ Great job! Your savings rate is 26.4%
   üí° Maintain this healthy financial habit

3. [MEDIUM] Increasing Spending Trend
   üí¨ Your spending is trending upward
   üí° Monitor your expenses closely to avoid overspending


In [53]:
model.save_model('finance_model.pkl')

print("\n" + "="*60)
print("‚úÖ MODEL TRAINING & ANALYSIS COMPLETED!")
print("="*60)


üíæ Model saved to: finance_model.pkl

‚úÖ MODEL TRAINING & ANALYSIS COMPLETED!
