# SmartCoins Data Analysis Portfolio

**Author:** Bienvenu Mwenyemali  
**Data Source:** [SmartCoins App API](https://smartcoinsapp.com/api/coins)  
**Skills Demonstrated:** Python, Pandas, API Integration, Data Analysis, Visualization

---

## Project Overview
This notebook demonstrates comprehensive data analytics skills using cryptocurrency data from SmartCoins App. We will:
1. Extract data from API
2. Clean and transform data
3. Perform statistical analysis
4. Create custom scoring algorithms
5. Detect outliers
6. Generate visualizations
7. Export data for further analysis

## Section 1: Setup and Imports

In [1]:
# Install required packages (uncomment if needed)
# !pip install pandas numpy matplotlib seaborn requests scipy

In [None]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import requests
import json
from datetime import datetime
import warnings
import sqlite3
import os

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')
plt.style.use('ggplot')

print("Libraries imported successfully!")
print(f"Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

AttributeError: module 'matplotlib.cm' has no attribute 'register_cmap'

## Section 2: Data Extraction from API

In [None]:
# API Configuration
API_URL = "https://smartcoinsapp.com/api/coins"

def extract_from_api(url):
    """Extract cryptocurrency data from SmartCoins API"""
    print(f"Fetching data from: {url}")
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        data = response.json()
        
        if isinstance(data, list):
            print(f"Successfully extracted {len(data)} coins")
            return data
        elif isinstance(data, dict):
            coins = data.get('data', data.get('coins', [data]))
            print(f"Successfully extracted {len(coins)} coins")
            return coins
    except Exception as e:
        print(f"Error fetching data: {e}")
        return None

# Extract data
raw_data = extract_from_api(API_URL)
print(f"\nTotal coins retrieved: {len(raw_data) if raw_data else 0}")

In [None]:
# View sample of raw data structure
if raw_data:
    print("Sample coin data structure:")
    print(json.dumps(raw_data[0], indent=2)[:1500])

## Section 3: Data Transformation

In [None]:
def transform_to_dataframe(coins_data):
    """Transform raw API data into a structured DataFrame"""
    records = []
    
    for coin in coins_data:
        # Extract nested data
        quote = coin.get('quote', {}).get('USD', {})
        signals = coin.get('signals', {})
        scores = coin.get('investmentScores', {})
        risk = coin.get('riskMetrics', {})
        network = coin.get('networkMetrics', {})
        supply = coin.get('supplyMetrics', {})
        
        record = {
            # Basic Info
            'coin_name': coin.get('name'),
            'symbol': coin.get('symbol'),
            'price_usd': quote.get('price', 0),
            'market_cap': quote.get('market_cap', 0),
            'volume_24h': quote.get('volume_24h', 0),
            'volume_change_24h': quote.get('volume_change_24h', 0),
            
            # Price Changes
            'pct_change_1h': quote.get('percent_change_1h', 0),
            'pct_change_24h': quote.get('percent_change_24h', 0),
            'pct_change_7d': quote.get('percent_change_7d', 0),
            'pct_change_30d': quote.get('percent_change_30d', 0),
            'pct_change_60d': quote.get('percent_change_60d', 0),
            'pct_change_90d': quote.get('percent_change_90d', 0),
            
            # Classification
            'coin_type': coin.get('coinType', 'Unknown'),
            'platform': coin.get('platform', {}).get('name', 'Native') if coin.get('platform') else 'Native',
            'category': coin.get('category', 'Uncategorized'),
            
            # Signals
            'primary_signal': signals.get('primarySignal', 'NEUTRAL'),
            'signal_strength': signals.get('signalStrength', 0),
            'overall_score': signals.get('overallScore', 0),
            'composite_score': signals.get('compositeScore', 0),
            
            # Momentum
            'change_momentum': signals.get('changeMomentum', 0),
            'momentum_acceleration': signals.get('momentumAcceleration', 0),
            'risk_adjusted_momentum': signals.get('riskAdjustedMomentum', 0),
            
            # Risk Metrics
            'price_volatility': risk.get('priceVolatility', 0),
            'volatility_risk': risk.get('volatilityRisk', 0),
            'liquidity_risk': risk.get('liquidityRisk', 0),
            'concentration_risk': risk.get('concentrationRisk', 0),
            
            # Network Metrics
            'nvt_score': network.get('nvtScore', 0),
            'mvrv_score': network.get('mvrvScore', 0),
            'scarcity_score': network.get('scarcityScore', 0),
            'efficiency_score': network.get('efficiencyScore', 0),
            'momentum_consistency': network.get('momentumConsistency', 0),
            
            # Investment Scores
            'inv_momentum_score': scores.get('momentumScore', 0),
            'inv_value_score': scores.get('valueScore', 0),
            'inv_risk_score': scores.get('riskScore', 0),
            'inv_activity_score': scores.get('activityScore', 0),
            'inv_network_score': scores.get('networkScore', 0),
            
            # Dates
            'date_added': coin.get('date_added'),
            'last_updated': coin.get('last_updated'),
            
            # Supply Metrics
            'max_supply': coin.get('max_supply'),
            'circulating_supply': coin.get('circulating_supply', 0),
            'total_supply': coin.get('total_supply', 0),
            'annual_inflation': supply.get('annualInflation', 0),
            'stock_to_flow': supply.get('stockToFlow', 0),
        }
        records.append(record)
    
    return pd.DataFrame(records)

# Create DataFrame
df = transform_to_dataframe(raw_data)
print(f"DataFrame created with {len(df)} rows and {len(df.columns)} columns")

## Section 4: Data Exploration

In [None]:
# DataFrame Shape and Info
print(f"Shape: {df.shape[0]} rows, {df.shape[1]} columns")
print("\nColumn Names:")
print(df.columns.tolist())

In [None]:
# Data Types
print("Data Types:")
df.dtypes

In [None]:
# First 10 rows
df.head(10)

In [None]:
# Basic Statistics
df.describe()

In [None]:
# Missing Values Analysis
missing = df.isnull().sum()
missing_pct = (missing / len(df) * 100).round(2)
missing_df = pd.DataFrame({
    'Missing Count': missing,
    'Missing %': missing_pct
})
print("Missing Values:")
missing_df[missing_df['Missing Count'] > 0]

## Section 5: Data Cleaning

In [None]:
# Create a copy for cleaning
df_clean = df.copy()

# Fill missing values
df_clean['max_supply'] = df_clean['max_supply'].fillna(0)
print("Filled missing max_supply values with 0")

# Convert date columns
for col in ['date_added', 'last_updated']:
    if col in df_clean.columns:
        df_clean[col] = pd.to_datetime(df_clean[col], errors='coerce')
        print(f"Converted {col} to datetime")

# Remove duplicates
initial_rows = len(df_clean)
df_clean = df_clean.drop_duplicates(subset=['symbol'], keep='first')
print(f"Removed {initial_rows - len(df_clean)} duplicate records")

print(f"\nCleaned DataFrame: {len(df_clean)} rows")

In [None]:
# Create Derived Features

# Price Tier Classification
def classify_price(price):
    if price < 0.001:
        return 'Micro'
    elif price < 1:
        return 'Low'
    elif price < 100:
        return 'Medium'
    else:
        return 'High'

df_clean['price_tier'] = df_clean['price_usd'].apply(classify_price)
print("Created price_tier classification")

# Momentum Category
def classify_momentum(momentum):
    if momentum > 1:
        return 'Strong Bullish'
    elif momentum > 0:
        return 'Bullish'
    elif momentum > -1:
        return 'Bearish'
    else:
        return 'Strong Bearish'

df_clean['momentum_category'] = df_clean['change_momentum'].apply(classify_momentum)
print("Created momentum_category classification")

# Risk Level Classification
def classify_risk(volatility_risk):
    if volatility_risk < 0.5:
        return 'Low Risk'
    elif volatility_risk < 2:
        return 'Medium Risk'
    else:
        return 'High Risk'

df_clean['risk_level'] = df_clean['volatility_risk'].apply(classify_risk)
print("Created risk_level classification")

print(f"\nNew columns: price_tier, momentum_category, risk_level")

## Section 6: Statistical Analysis

In [None]:
# Descriptive Statistics for Key Metrics
key_metrics = ['price_usd', 'volume_24h', 'overall_score', 'composite_score', 
               'change_momentum', 'price_volatility', 'volatility_risk']

available_metrics = [m for m in key_metrics if m in df_clean.columns]
df_clean[available_metrics].describe()

In [None]:
# Correlation Analysis
score_columns = ['overall_score', 'composite_score', 'change_momentum', 
                 'price_volatility', 'efficiency_score', 'inv_momentum_score']
available_cols = [c for c in score_columns if c in df_clean.columns]

corr_matrix = df_clean[available_cols].corr()
print("Correlation Matrix:")
corr_matrix.round(3)

In [None]:
# Top Correlations
corr_pairs = []
for i in range(len(corr_matrix.columns)):
    for j in range(i+1, len(corr_matrix.columns)):
        corr_pairs.append({
            'Pair': f"{corr_matrix.columns[i]} <-> {corr_matrix.columns[j]}",
            'Correlation': abs(corr_matrix.iloc[i, j])
        })

corr_df = pd.DataFrame(corr_pairs).sort_values('Correlation', ascending=False)
print("Top 10 Strongest Correlations:")
corr_df.head(10)

## Section 7: Custom Scoring Functions

In [None]:
def calculate_momentum_score(row):
    """Calculate custom momentum score based on multiple timeframe changes"""
    weights = {
        'pct_change_1h': 0.05,
        'pct_change_24h': 0.15,
        'pct_change_7d': 0.25,
        'pct_change_30d': 0.30,
        'pct_change_90d': 0.25
    }
    
    score = 0
    for col, weight in weights.items():
        if col in row.index and pd.notna(row[col]):
            normalized = np.tanh(row[col] / 50) * 50
            score += normalized * weight
    
    return round(score, 2)

def calculate_risk_score(row):
    """Calculate custom risk score (lower is better)"""
    risk_factors = [
        ('volatility_risk', 0.35),
        ('liquidity_risk', 0.30),
        ('concentration_risk', 0.20),
        ('price_volatility', 0.15)
    ]
    
    score = 0
    for col, weight in risk_factors:
        if col in row.index and pd.notna(row[col]):
            normalized = min(row[col] / 10, 10)
            score += normalized * weight
    
    return round(score, 2)

def calculate_investment_score(row):
    """Calculate overall investment score"""
    momentum = row.get('momentum_score', 0)
    risk = row.get('risk_score', 5)
    overall = row.get('overall_score', 50)
    
    # Normalize components
    momentum_normalized = (momentum + 50) / 100 * 40
    risk_normalized = (10 - min(risk, 10)) / 10 * 30
    overall_normalized = overall / 100 * 30
    
    return round(momentum_normalized + risk_normalized + overall_normalized, 2)

# Apply scoring functions
df_clean['momentum_score'] = df_clean.apply(calculate_momentum_score, axis=1)
df_clean['risk_score'] = df_clean.apply(calculate_risk_score, axis=1)
df_clean['investment_score'] = df_clean.apply(calculate_investment_score, axis=1)

print("Custom scores calculated:")
print("- momentum_score: Based on multi-timeframe price changes")
print("- risk_score: Based on volatility and liquidity risks")
print("- investment_score: Composite score for investment decisions")

In [None]:
# Signal Prediction Function
def predict_signal(row):
    """Predict trading signal based on multiple factors"""
    score = 0
    
    # Momentum contribution
    if row.get('momentum_score', 0) > 20:
        score += 2
    elif row.get('momentum_score', 0) > 0:
        score += 1
    elif row.get('momentum_score', 0) < -20:
        score -= 2
    elif row.get('momentum_score', 0) < 0:
        score -= 1
    
    # Risk contribution
    if row.get('risk_score', 5) < 2:
        score += 1
    elif row.get('risk_score', 5) > 5:
        score -= 1
    
    # Investment score contribution
    if row.get('investment_score', 50) > 60:
        score += 1
    elif row.get('investment_score', 50) < 40:
        score -= 1
    
    # Determine signal
    if score >= 3:
        return 'STRONG BUY'
    elif score >= 1:
        return 'BUY'
    elif score <= -3:
        return 'STRONG SELL'
    elif score <= -1:
        return 'SELL'
    else:
        return 'HOLD'

df_clean['predicted_signal'] = df_clean.apply(predict_signal, axis=1)

print("Signal Distribution:")
df_clean['predicted_signal'].value_counts()

## Section 8: Outlier Detection

In [None]:
def detect_outliers_zscore(df, column, threshold=3):
    """Detect outliers using Z-score method"""
    if column not in df.columns:
        return pd.Series([False] * len(df))
    
    z_scores = np.abs(stats.zscore(df[column].fillna(0)))
    return z_scores > threshold

def detect_outliers_iqr(df, column):
    """Detect outliers using IQR method"""
    if column not in df.columns:
        return pd.Series([False] * len(df))
    
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    return (df[column] < lower_bound) | (df[column] > upper_bound)

# Detect outliers in key columns
outlier_columns = ['price_usd', 'volume_24h', 'price_volatility', 'investment_score']

print("Outlier Detection Results:")
print("=" * 50)

for col in outlier_columns:
    if col in df_clean.columns:
        zscore_outliers = detect_outliers_zscore(df_clean, col)
        iqr_outliers = detect_outliers_iqr(df_clean, col)
        
        df_clean[f'{col}_outlier_zscore'] = zscore_outliers
        df_clean[f'{col}_outlier_iqr'] = iqr_outliers
        
        print(f"\n{col}:")
        print(f"  Z-score outliers: {zscore_outliers.sum()}")
        print(f"  IQR outliers: {iqr_outliers.sum()}")

## Section 9: Top N Analysis

In [None]:
# Top 10 by Investment Score
top_investment = df_clean.nlargest(10, 'investment_score')[[
    'coin_name', 'symbol', 'investment_score', 'momentum_score', 
    'risk_score', 'predicted_signal'
]]
print("TOP 10 COINS BY INVESTMENT SCORE")
print("=" * 60)
top_investment

In [None]:
# Top 10 Lowest Risk
top_low_risk = df_clean.nsmallest(10, 'risk_score')[[
    'coin_name', 'symbol', 'risk_score', 'risk_level'
]]
print("TOP 10 LOWEST RISK COINS")
print("=" * 60)
top_low_risk

In [None]:
# Top 10 by Momentum
top_momentum = df_clean.nlargest(10, 'momentum_score')[[
    'coin_name', 'symbol', 'momentum_score', 'change_momentum'
]]
print("TOP 10 COINS BY MOMENTUM")
print("=" * 60)
top_momentum

In [None]:
# Distribution Analysis
print("DISTRIBUTION ANALYSIS")
print("=" * 60)

print("\nPrice Tier Distribution:")
print(df_clean['price_tier'].value_counts())

print("\nRisk Level Distribution:")
print(df_clean['risk_level'].value_counts())

print("\nSignal Distribution:")
print(df_clean['predicted_signal'].value_counts())

## Section 10: Data Visualization

In [None]:
# 1. Score Distributions
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Investment Score
axes[0, 0].hist(df_clean['investment_score'].dropna(), bins=20, color='steelblue', edgecolor='white')
axes[0, 0].set_title('Investment Score Distribution', fontweight='bold')
axes[0, 0].set_xlabel('Investment Score')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].axvline(df_clean['investment_score'].mean(), color='red', linestyle='--', label='Mean')
axes[0, 0].legend()

# Momentum Score
axes[0, 1].hist(df_clean['momentum_score'].dropna(), bins=20, color='forestgreen', edgecolor='white')
axes[0, 1].set_title('Momentum Score Distribution', fontweight='bold')
axes[0, 1].set_xlabel('Momentum Score')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].axvline(df_clean['momentum_score'].mean(), color='red', linestyle='--', label='Mean')
axes[0, 1].legend()

# Risk Score
axes[1, 0].hist(df_clean['risk_score'].dropna(), bins=20, color='coral', edgecolor='white')
axes[1, 0].set_title('Risk Score Distribution', fontweight='bold')
axes[1, 0].set_xlabel('Risk Score')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].axvline(df_clean['risk_score'].mean(), color='red', linestyle='--', label='Mean')
axes[1, 0].legend()

# Overall Score
axes[1, 1].hist(df_clean['overall_score'].dropna(), bins=20, color='purple', edgecolor='white')
axes[1, 1].set_title('Overall Score Distribution', fontweight='bold')
axes[1, 1].set_xlabel('Overall Score')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].axvline(df_clean['overall_score'].mean(), color='red', linestyle='--', label='Mean')
axes[1, 1].legend()

plt.tight_layout()
plt.show()

In [None]:
# 2. Scatter Plot for Outlier Detection
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Investment vs Risk Score
colors = ['red' if x else 'steelblue' for x in df_clean.get('investment_score_outlier_iqr', [False]*len(df_clean))]
axes[0].scatter(df_clean['risk_score'], df_clean['investment_score'], 
                c=colors, alpha=0.6, s=50)
axes[0].set_xlabel('Risk Score', fontsize=12)
axes[0].set_ylabel('Investment Score', fontsize=12)
axes[0].set_title('Investment Score vs Risk Score\n(Red = Outliers)', fontweight='bold')
axes[0].grid(True, alpha=0.3)

# Volume vs Price (log scale)
df_positive = df_clean[(df_clean['volume_24h'] > 0) & (df_clean['price_usd'] > 0)]
colors2 = ['red' if x else 'forestgreen' for x in df_positive.get('volume_24h_outlier_iqr', [False]*len(df_positive))]
axes[1].scatter(np.log10(df_positive['price_usd'] + 1), 
                np.log10(df_positive['volume_24h'] + 1),
                c=colors2, alpha=0.6, s=50)
axes[1].set_xlabel('Log10(Price USD)', fontsize=12)
axes[1].set_ylabel('Log10(Volume 24h)', fontsize=12)
axes[1].set_title('Volume vs Price (Log Scale)\n(Red = Outliers)', fontweight='bold')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# 3. Top 10 Coins Bar Chart
top10 = df_clean.nlargest(10, 'investment_score')

fig, ax = plt.subplots(figsize=(12, 6))

x = range(len(top10))
width = 0.35

bars1 = ax.bar([i - width/2 for i in x], top10['investment_score'], width, 
               label='Investment Score', color='steelblue')
bars2 = ax.bar([i + width/2 for i in x], top10['momentum_score'], width,
               label='Momentum Score', color='coral')

ax.set_xlabel('Coin', fontsize=12)
ax.set_ylabel('Score', fontsize=12)
ax.set_title('Top 10 Coins: Investment vs Momentum Scores', fontweight='bold', fontsize=14)
ax.set_xticks(x)
ax.set_xticklabels(top10['symbol'], rotation=45, ha='right')
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

In [None]:
# 4. Correlation Heatmap
score_cols = ['investment_score', 'momentum_score', 'risk_score', 'overall_score',
              'composite_score', 'change_momentum', 'price_volatility']
available = [c for c in score_cols if c in df_clean.columns]

fig, ax = plt.subplots(figsize=(10, 8))
corr = df_clean[available].corr()

# Create heatmap using matplotlib
im = ax.imshow(corr, cmap='RdYlGn', vmin=-1, vmax=1)
plt.colorbar(im, ax=ax)
ax.set_xticks(range(len(available)))
ax.set_yticks(range(len(available)))
ax.set_xticklabels(available, rotation=45, ha='right')
ax.set_yticklabels(available)

# Add correlation values as text
for i in range(len(available)):
    for j in range(len(available)):
        ax.text(j, i, f'{corr.iloc[i, j]:.2f}', ha='center', va='center', fontsize=9)

ax.set_title('Correlation Heatmap of Key Metrics', fontweight='bold', fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# 5. Risk Level Pie Chart
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Risk Level
risk_counts = df_clean['risk_level'].value_counts()
colors1 = ['#2ecc71', '#f39c12', '#e74c3c']
axes[0].pie(risk_counts.values, labels=risk_counts.index, autopct='%1.1f%%',
            colors=colors1, startangle=90)
axes[0].set_title('Risk Level Distribution', fontweight='bold', fontsize=14)

# Signal Distribution
signal_counts = df_clean['predicted_signal'].value_counts()
colors2 = ['#27ae60', '#2ecc71', '#f39c12', '#e74c3c', '#c0392b']
axes[1].pie(signal_counts.values, labels=signal_counts.index, autopct='%1.1f%%',
            colors=colors2[:len(signal_counts)], startangle=90)
axes[1].set_title('Predicted Signal Distribution', fontweight='bold', fontsize=14)

plt.tight_layout()
plt.show()

In [None]:
# 6. Box Plots for Score Comparison
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Investment Score by Risk Level
data_inv = [df_clean[df_clean['risk_level'] == level]['investment_score'].dropna() 
            for level in ['Low Risk', 'Medium Risk', 'High Risk']]
bp1 = axes[0].boxplot([d for d in data_inv if len(d) > 0], patch_artist=True)
axes[0].set_xticklabels(['Low Risk', 'Medium Risk', 'High Risk'])
axes[0].set_ylabel('Investment Score')
axes[0].set_title('Investment Score by Risk Level', fontweight='bold')
for patch, color in zip(bp1['boxes'], ['#2ecc71', '#f39c12', '#e74c3c']):
    patch.set_facecolor(color)

# Momentum Score by Price Tier
data_mom = [df_clean[df_clean['price_tier'] == tier]['momentum_score'].dropna()
            for tier in ['Micro', 'Low', 'Medium', 'High']]
bp2 = axes[1].boxplot([d for d in data_mom if len(d) > 0], patch_artist=True)
axes[1].set_xticklabels(['Micro', 'Low', 'Medium', 'High'])
axes[1].set_ylabel('Momentum Score')
axes[1].set_title('Momentum Score by Price Tier', fontweight='bold')
for patch, color in zip(bp2['boxes'], ['#3498db', '#9b59b6', '#e67e22', '#1abc9c']):
    patch.set_facecolor(color)

# Risk Score by Signal
signals = df_clean['predicted_signal'].unique()
data_risk = [df_clean[df_clean['predicted_signal'] == sig]['risk_score'].dropna() for sig in signals]
bp3 = axes[2].boxplot([d for d in data_risk if len(d) > 0], patch_artist=True)
axes[2].set_xticklabels(signals, rotation=45, ha='right')
axes[2].set_ylabel('Risk Score')
axes[2].set_title('Risk Score by Signal', fontweight='bold')

plt.tight_layout()
plt.show()

## Section 11: Export Data

In [None]:
# Create output directories
os.makedirs('output/data', exist_ok=True)
os.makedirs('output/reports', exist_ok=True)

# Export to CSV
df_clean.to_csv('output/data/smartcoins_analyzed.csv', index=False)
print("Exported: output/data/smartcoins_analyzed.csv")

# Export Top 50
top50 = df_clean.nlargest(50, 'investment_score')
top50.to_csv('output/data/top_coins.csv', index=False)
print("Exported: output/data/top_coins.csv")

# Export to SQLite for SQL analysis
conn = sqlite3.connect('output/data/smartcoins.db')
df_clean.to_sql('coins', conn, if_exists='replace', index=False)
conn.close()
print("Exported: output/data/smartcoins.db")

## Section 12: Summary Report

In [None]:
print("=" * 70)
print("SMARTCOINS ANALYSIS SUMMARY REPORT")
print("=" * 70)

print(f"\nTotal Coins Analyzed: {len(df_clean)}")
print(f"Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

print("\n" + "-" * 50)
print("KEY STATISTICS")
print("-" * 50)
print(f"Average Investment Score: {df_clean['investment_score'].mean():.2f}")
print(f"Average Momentum Score: {df_clean['momentum_score'].mean():.2f}")
print(f"Average Risk Score: {df_clean['risk_score'].mean():.2f}")

print("\n" + "-" * 50)
print("TOP 5 INVESTMENT OPPORTUNITIES")
print("-" * 50)
top5 = df_clean.nlargest(5, 'investment_score')[['coin_name', 'symbol', 'investment_score', 'predicted_signal']]
for i, row in top5.iterrows():
    print(f"  {row['symbol']:10s} - Score: {row['investment_score']:6.2f} - Signal: {row['predicted_signal']}")

print("\n" + "-" * 50)
print("SIGNAL DISTRIBUTION")
print("-" * 50)
for signal, count in df_clean['predicted_signal'].value_counts().items():
    print(f"  {signal:15s}: {count:3d} ({count/len(df_clean)*100:.1f}%)")

print("\n" + "-" * 50)
print("SKILLS DEMONSTRATED")
print("-" * 50)
skills = [
    "API Data Extraction (requests)",
    "Data Transformation (pandas)",
    "Data Cleaning and Preprocessing",
    "Statistical Analysis (scipy.stats)",
    "Custom Scoring Functions",
    "Outlier Detection (Z-score, IQR)",
    "Data Visualization (matplotlib)",
    "SQL Database Export (sqlite3)"
]
for skill in skills:
    print(f"  - {skill}")

print("\n" + "=" * 70)
print("ANALYSIS COMPLETE")
print("=" * 70)