# SmartCoins SQL Analysis Portfolio

**Author:** Bienvenu Mwenyemali  
**Skills Demonstrated:** SQL, SQLite, Data Analysis, Business Intelligence

---

## Overview
This notebook demonstrates SQL analysis skills using cryptocurrency data from SmartCoins App.  
We use Python's sqlite3 to run SQL queries on our dataset.

**SQL Skills Covered:**
- SELECT, WHERE, ORDER BY
- Aggregate functions (COUNT, SUM, AVG, MIN, MAX)
- GROUP BY and HAVING
- CASE statements
- Subqueries
- Window functions (RANK, ROW_NUMBER)
- Common Table Expressions (CTEs)

## Section 1: Setup and Data Loading

In [1]:
import sqlite3
import pandas as pd
import requests
from datetime import datetime

print("Libraries imported successfully!")
print(f"Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

Libraries imported successfully!
Analysis Date: 2026-02-11 02:12:25


In [2]:
# Fetch data from API
API_URL = "https://smartcoinsapp.com/api/coins"
print(f"Fetching data from: {API_URL}")

response = requests.get(API_URL, timeout=30)
data = response.json()
print(f"Retrieved {len(data)} coins")

Fetching data from: https://smartcoinsapp.com/api/coins
Retrieved 5 coins


In [3]:
# Transform data to DataFrame
records = []
for coin in data:
    quote = coin.get('quote', {}).get('USD', {})
    signals = coin.get('signals', {})
    scores = coin.get('investmentScores', {})
    risk = coin.get('riskMetrics', {})
    
    record = {
        'coin_name': coin.get('name'),
        'symbol': coin.get('symbol'),
        'price_usd': quote.get('price', 0),
        'market_cap': quote.get('market_cap', 0),
        'volume_24h': quote.get('volume_24h', 0),
        'pct_change_24h': quote.get('percent_change_24h', 0),
        'pct_change_7d': quote.get('percent_change_7d', 0),
        'pct_change_30d': quote.get('percent_change_30d', 0),
        'coin_type': coin.get('coinType', 'Unknown'),
        'category': coin.get('category', 'Uncategorized'),
        'primary_signal': signals.get('primarySignal', 'NEUTRAL'),
        'signal_strength': signals.get('signalStrength', 0),
        'overall_score': signals.get('overallScore', 0),
        'composite_score': signals.get('compositeScore', 0),
        'change_momentum': signals.get('changeMomentum', 0),
        'price_volatility': risk.get('priceVolatility', 0),
        'volatility_risk': risk.get('volatilityRisk', 0),
        'liquidity_risk': risk.get('liquidityRisk', 0),
        'inv_momentum_score': scores.get('momentumScore', 0),
        'inv_risk_score': scores.get('riskScore', 0),
        'circulating_supply': coin.get('circulating_supply', 0),
    }
    records.append(record)

df = pd.DataFrame(records)
df = df.drop_duplicates(subset=['symbol'], keep='first')
print(f"DataFrame: {len(df)} rows, {len(df.columns)} columns")

AttributeError: 'str' object has no attribute 'get'

In [None]:
# Create SQLite database in memory
conn = sqlite3.connect(':memory:')
df.to_sql('coins', conn, if_exists='replace', index=False)
print("Database created successfully!")

# Helper function to run SQL and display results
def run_sql(query, description=""):
    if description:
        print(f"\n{description}")
        print("-" * 50)
    result = pd.read_sql_query(query, conn)
    return result

## Section 2: Basic SELECT Queries

In [None]:
# Query 1: Select all columns, first 10 rows
query = """
SELECT * 
FROM coins 
LIMIT 10;
"""
run_sql(query, "First 10 coins - All columns")

In [None]:
# Query 2: Select specific columns
query = """
SELECT coin_name, symbol, price_usd, market_cap, primary_signal
FROM coins
LIMIT 10;
"""
run_sql(query, "Select specific columns")

In [None]:
# Query 3: Count total records
query = """
SELECT COUNT(*) AS total_coins
FROM coins;
"""
run_sql(query, "Total number of coins")

## Section 3: Filtering with WHERE

In [None]:
# Query 4: Filter by signal
query = """
SELECT coin_name, symbol, price_usd, primary_signal, overall_score
FROM coins
WHERE primary_signal = 'BUY'
ORDER BY overall_score DESC;
"""
run_sql(query, "Coins with BUY signal")

In [None]:
# Query 5: Multiple conditions with AND
query = """
SELECT coin_name, symbol, price_usd, volatility_risk, overall_score
FROM coins
WHERE overall_score > 60 
  AND volatility_risk < 2
ORDER BY overall_score DESC;
"""
run_sql(query, "High score AND low risk coins")

In [None]:
# Query 6: Filter with OR and IN
query = """
SELECT coin_name, symbol, primary_signal, overall_score
FROM coins
WHERE primary_signal IN ('BUY', 'STRONG BUY')
   OR overall_score > 70
ORDER BY overall_score DESC
LIMIT 15;
"""
run_sql(query, "BUY signals OR high overall score")

In [None]:
# Query 7: Filter with BETWEEN
query = """
SELECT coin_name, symbol, price_usd, overall_score
FROM coins
WHERE price_usd BETWEEN 0.001 AND 1
ORDER BY price_usd DESC;
"""
run_sql(query, "Coins priced between $0.001 and $1")

## Section 4: Sorting with ORDER BY

In [None]:
# Query 8: Top 10 by market cap
query = """
SELECT coin_name, symbol, market_cap, volume_24h
FROM coins
WHERE market_cap > 0
ORDER BY market_cap DESC
LIMIT 10;
"""
run_sql(query, "Top 10 coins by Market Cap")

In [None]:
# Query 9: Top 10 by overall score
query = """
SELECT coin_name, symbol, overall_score, composite_score, primary_signal
FROM coins
ORDER BY overall_score DESC
LIMIT 10;
"""
run_sql(query, "Top 10 coins by Overall Score")

In [None]:
# Query 10: Bottom 10 by volatility risk (lowest risk)
query = """
SELECT coin_name, symbol, volatility_risk, liquidity_risk, primary_signal
FROM coins
ORDER BY volatility_risk ASC
LIMIT 10;
"""
run_sql(query, "Top 10 Lowest Risk coins")

## Section 5: Aggregate Functions

In [None]:
# Query 11: Basic aggregates
query = """
SELECT 
    COUNT(*) AS total_coins,
    ROUND(AVG(overall_score), 2) AS avg_score,
    ROUND(MIN(overall_score), 2) AS min_score,
    ROUND(MAX(overall_score), 2) AS max_score,
    ROUND(AVG(volatility_risk), 2) AS avg_risk
FROM coins;
"""
run_sql(query, "Summary Statistics")

In [None]:
# Query 12: Sum and average of market metrics
query = """
SELECT 
    ROUND(SUM(market_cap), 2) AS total_market_cap,
    ROUND(AVG(market_cap), 2) AS avg_market_cap,
    ROUND(SUM(volume_24h), 2) AS total_volume,
    ROUND(AVG(volume_24h), 2) AS avg_volume
FROM coins
WHERE market_cap > 0;
"""
run_sql(query, "Market Metrics Summary")

## Section 6: GROUP BY Analysis

In [None]:
# Query 13: Count by signal type
query = """
SELECT 
    primary_signal,
    COUNT(*) AS coin_count,
    ROUND(AVG(overall_score), 2) AS avg_score
FROM coins
GROUP BY primary_signal
ORDER BY coin_count DESC;
"""
run_sql(query, "Coins grouped by Signal")

In [None]:
# Query 14: Count by coin type
query = """
SELECT 
    coin_type,
    COUNT(*) AS coin_count,
    ROUND(AVG(overall_score), 2) AS avg_score,
    ROUND(AVG(volatility_risk), 2) AS avg_risk
FROM coins
GROUP BY coin_type
ORDER BY coin_count DESC;
"""
run_sql(query, "Analysis by Coin Type")

In [None]:
# Query 15: GROUP BY with HAVING
query = """
SELECT 
    coin_type,
    COUNT(*) AS coin_count,
    ROUND(AVG(overall_score), 2) AS avg_score
FROM coins
GROUP BY coin_type
HAVING COUNT(*) >= 3
ORDER BY avg_score DESC;
"""
run_sql(query, "Coin types with 3+ coins (HAVING)")

## Section 7: CASE Statements

In [None]:
# Query 16: Create price tier using CASE
query = """
SELECT 
    coin_name,
    symbol,
    price_usd,
    CASE 
        WHEN price_usd < 0.001 THEN 'Micro'
        WHEN price_usd < 1 THEN 'Low'
        WHEN price_usd < 100 THEN 'Medium'
        ELSE 'High'
    END AS price_tier
FROM coins
ORDER BY price_usd DESC
LIMIT 15;
"""
run_sql(query, "Price Tier Classification (CASE)")

In [None]:
# Query 17: Risk level classification
query = """
SELECT 
    coin_name,
    symbol,
    volatility_risk,
    CASE 
        WHEN volatility_risk < 0.5 THEN 'Low Risk'
        WHEN volatility_risk < 2 THEN 'Medium Risk'
        ELSE 'High Risk'
    END AS risk_level
FROM coins
ORDER BY volatility_risk ASC
LIMIT 15;
"""
run_sql(query, "Risk Level Classification")

In [None]:
# Query 18: Count by risk level
query = """
SELECT 
    CASE 
        WHEN volatility_risk < 0.5 THEN 'Low Risk'
        WHEN volatility_risk < 2 THEN 'Medium Risk'
        ELSE 'High Risk'
    END AS risk_level,
    COUNT(*) AS coin_count,
    ROUND(AVG(overall_score), 2) AS avg_score
FROM coins
GROUP BY risk_level
ORDER BY coin_count DESC;
"""
run_sql(query, "Distribution by Risk Level")

## Section 8: Subqueries

In [None]:
# Query 19: Coins above average score
query = """
SELECT coin_name, symbol, overall_score
FROM coins
WHERE overall_score > (SELECT AVG(overall_score) FROM coins)
ORDER BY overall_score DESC;
"""
run_sql(query, "Coins above average score (Subquery)")

In [None]:
# Query 20: Coins with lowest risk in each type
query = """
SELECT coin_name, symbol, coin_type, volatility_risk
FROM coins c1
WHERE volatility_risk = (
    SELECT MIN(volatility_risk) 
    FROM coins c2 
    WHERE c2.coin_type = c1.coin_type
)
ORDER BY coin_type;
"""
run_sql(query, "Lowest risk coin per type (Correlated Subquery)")

## Section 9: Window Functions

In [None]:
# Query 21: RANK by overall score
query = """
SELECT 
    RANK() OVER (ORDER BY overall_score DESC) AS rank,
    coin_name,
    symbol,
    overall_score,
    primary_signal
FROM coins
LIMIT 15;
"""
run_sql(query, "Ranked by Overall Score (RANK)")

In [None]:
# Query 22: ROW_NUMBER within coin type
query = """
SELECT 
    coin_type,
    ROW_NUMBER() OVER (PARTITION BY coin_type ORDER BY overall_score DESC) AS type_rank,
    coin_name,
    symbol,
    overall_score
FROM coins
WHERE coin_type IN ('token', 'coin')
LIMIT 20;
"""
run_sql(query, "Ranking within coin type (PARTITION BY)")

In [None]:
# Query 23: Running total of market cap
query = """
SELECT 
    coin_name,
    symbol,
    market_cap,
    SUM(market_cap) OVER (ORDER BY market_cap DESC) AS running_total
FROM coins
WHERE market_cap > 0
LIMIT 10;
"""
run_sql(query, "Running Total of Market Cap")

## Section 10: Common Table Expressions (CTEs)

In [None]:
# Query 24: CTE for top performers analysis
query = """
WITH top_performers AS (
    SELECT coin_name, symbol, overall_score, volatility_risk
    FROM coins
    WHERE overall_score > 60
),
low_risk AS (
    SELECT coin_name, symbol, overall_score, volatility_risk
    FROM top_performers
    WHERE volatility_risk < 2
)
SELECT * FROM low_risk
ORDER BY overall_score DESC;
"""
run_sql(query, "High performers with low risk (CTE)")

In [None]:
# Query 25: CTE with aggregation
query = """
WITH signal_stats AS (
    SELECT 
        primary_signal,
        COUNT(*) AS count,
        ROUND(AVG(overall_score), 2) AS avg_score,
        ROUND(AVG(volatility_risk), 2) AS avg_risk
    FROM coins
    GROUP BY primary_signal
)
SELECT 
    primary_signal,
    count,
    avg_score,
    avg_risk,
    ROUND(100.0 * count / SUM(count) OVER (), 2) AS percentage
FROM signal_stats
ORDER BY count DESC;
"""
run_sql(query, "Signal distribution with percentage (CTE)")

## Section 11: Business Intelligence Queries

In [None]:
# Query 26: Investment recommendation report
query = """
SELECT 
    RANK() OVER (ORDER BY overall_score DESC) AS rank,
    coin_name,
    symbol,
    coin_type,
    ROUND(price_usd, 8) AS price,
    overall_score,
    primary_signal,
    CASE 
        WHEN volatility_risk < 0.5 THEN 'Low'
        WHEN volatility_risk < 2 THEN 'Medium'
        ELSE 'High'
    END AS risk_level
FROM coins
WHERE overall_score > 50
ORDER BY overall_score DESC
LIMIT 20;
"""
run_sql(query, "Investment Recommendations Report")

In [None]:
# Query 27: Market overview dashboard
query = """
SELECT 
    'Total Coins' AS metric, CAST(COUNT(*) AS TEXT) AS value FROM coins
UNION ALL
SELECT 
    'Avg Overall Score', CAST(ROUND(AVG(overall_score), 2) AS TEXT) FROM coins
UNION ALL
SELECT 
    'BUY Signals', CAST(COUNT(*) AS TEXT) FROM coins WHERE primary_signal LIKE '%BUY%'
UNION ALL
SELECT 
    'SELL Signals', CAST(COUNT(*) AS TEXT) FROM coins WHERE primary_signal LIKE '%SELL%'
UNION ALL
SELECT 
    'Low Risk Coins', CAST(COUNT(*) AS TEXT) FROM coins WHERE volatility_risk < 0.5;
"""
run_sql(query, "Market Overview Dashboard")

In [None]:
# Query 28: Cross-tabulation - Signal vs Risk
query = """
SELECT 
    primary_signal,
    SUM(CASE WHEN volatility_risk < 0.5 THEN 1 ELSE 0 END) AS low_risk,
    SUM(CASE WHEN volatility_risk >= 0.5 AND volatility_risk < 2 THEN 1 ELSE 0 END) AS medium_risk,
    SUM(CASE WHEN volatility_risk >= 2 THEN 1 ELSE 0 END) AS high_risk,
    COUNT(*) AS total
FROM coins
GROUP BY primary_signal
ORDER BY total DESC;
"""
run_sql(query, "Signal vs Risk Level Cross-Tabulation")

## Section 12: Summary

In [None]:
print("=" * 60)
print("SQL ANALYSIS COMPLETE")
print("=" * 60)

print("\nSQL Skills Demonstrated:")
skills = [
    "SELECT, WHERE, ORDER BY, LIMIT",
    "Aggregate functions (COUNT, SUM, AVG, MIN, MAX)",
    "GROUP BY and HAVING clauses",
    "CASE statements for classification",
    "Subqueries (simple and correlated)",
    "Window functions (RANK, ROW_NUMBER, SUM OVER)",
    "Common Table Expressions (CTEs)",
    "UNION for combining results",
    "Cross-tabulation queries"
]
for skill in skills:
    print(f"  - {skill}")

print("\n" + "=" * 60)
print(f"Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("Author: Bienvenu Mwenyemali")
print("=" * 60)

In [None]:
# Close connection
conn.close()
print("Database connection closed.")