<a href="https://colab.research.google.com/github/harshi1707/FinalProject/blob/main/Tokenomics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

!pip -q install ccxt streamlit pyngrok plotly pandas scikit-learn ta --quiet

import os, time, random, math
from pyngrok import ngrok
import pandas as pd, numpy as np
import ccxt
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import ta
from datetime import datetime, timedelta
import json

SYMBOL = "BTC/USDT"
TIMEFRAME = "1h"
HOURS = 24 * 30
SEQ_LEN = 24
NUM_PUMP_EVENTS = 6
RANDOM_SEED = 42
INJECT_FRAUD = True
CONTAMINATION = 0.02
PLAYBACK_SPEED = 0.25

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

def fetch_binance_ohlcv(symbol=SYMBOL, timeframe=TIMEFRAME, limit=1000):
    try:
        exchange = ccxt.kucoin()

        ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)
        df = pd.DataFrame(ohlcv, columns=['ts','open','high','low','close','volume'])
        df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
        df = df[['datetime','open','high','low','close','volume']]
        return df
    except Exception as e:
        print("ccxt fetch failed:", e)
        return None

print("Fetching historical data...")
df = fetch_binance_ohlcv(limit=HOURS)
if df is None or len(df) < 200:
    print("Falling back to synthetic random-walk data (no network or insufficient history).")
    end = datetime.utcnow()
    periods = HOURS
    dates = [end - timedelta(hours=(periods - i)) for i in range(periods)]
    price = 30000.0 + np.cumsum(np.random.normal(0, 100, size=periods))
    vol = np.random.uniform(50, 500, size=periods)
    df = pd.DataFrame({'datetime': dates, 'open': price, 'high': price*1.002, 'low': price*0.998, 'close': price, 'volume': vol})
    df = df.sort_values('datetime').reset_index(drop=True)

print("Loaded rows:", len(df))

df_sim = df.copy().reset_index(drop=True)
df_sim['is_injected_fraud'] = 0

if INJECT_FRAUD:
    n = len(df_sim)
    possible_idx = list(range(30, n-30))
    random.shuffle(possible_idx)
    injected = []
    min_gap = SEQ_LEN + 12
    for idx in possible_idx:
        if len(injected) >= NUM_PUMP_EVENTS:
            break
        if all(abs(idx - j) > min_gap for j in injected):
            injected.append(idx)
    for idx in injected:
        pump_len = random.choice([1,2,3])
        mag = random.uniform(1.12, 1.6)
        dump_len = pump_len + random.choice([1,2])
        for j in range(pump_len):
            i = min(idx + j, n-1)
            base = df_sim.loc[i, 'close']
            df_sim.loc[i, ['open','high','low','close']] = base * mag
            df_sim.loc[i, 'is_injected_fraud'] = 1
        for j in range(dump_len):
            i = min(idx + pump_len + j, n-1)
            base = df.loc[i, 'close']
            df_sim.loc[i, ['open','high','low','close']] = base * random.uniform(0.6,0.95)
            df_sim.loc[i, 'is_injected_fraud'] = 1
        if random.random() < 0.4:
            for j in range(-1,3):
                i = min(max(idx + j, 0), n-1)
                df_sim.loc[i, 'volume'] *= random.uniform(3,10)
                df_sim.loc[i, 'is_injected_fraud'] = 1

print("Injected fraud points:", int(df_sim['is_injected_fraud'].sum()))

df_feat = df_sim.copy()
df_feat['rsi'] = ta.momentum.rsi(df_feat['close'], window=14).bfill()
df_feat['ema20'] = ta.trend.ema_indicator(df_feat['close'], window=20).bfill()
bb = ta.volatility.BollingerBands(df_feat['close'], window=20, window_dev=2)
df_feat['bb_w'] = (bb.bollinger_hband() - bb.bollinger_lband()).bfill()
df_feat['returns'] = df_feat['close'].pct_change().fillna(0)
df_feat['vol_norm'] = (df_feat['volume'] - df_feat['volume'].rolling(20).mean()).fillna(0)
df_feat = df_feat.dropna().reset_index(drop=True)

WINDOW = SEQ_LEN
rows = []
for i in range(WINDOW, len(df_feat)):
    win = df_feat.iloc[i-WINDOW:i]
    row = {
        'dt': df_feat['datetime'].iloc[i],
        'close_mean': win['close'].mean(),
        'close_std': win['close'].std(),
        'vol_sum': win['volume'].sum(),
        'ret_mean': win['returns'].mean(),
        'rsi_mean': win['rsi'].mean(),
        'bbw_mean': win['bb_w'].mean(),
        'is_injected_fraud': int(df_feat['is_injected_fraud'].iloc[i])
    }
    rows.append(row)
df_window = pd.DataFrame(rows).reset_index(drop=True)
print("Windowed dataset shape:", df_window.shape)

X = df_window[['close_mean','close_std','vol_sum','ret_mean','rsi_mean','bbw_mean']].values
scaler = StandardScaler()
Xs = scaler.fit_transform(X)
iso = IsolationForest(contamination=CONTAMINATION, random_state=RANDOM_SEED)
iso.fit(Xs)
scores = -iso.decision_function(Xs)
df_window['anomaly_score'] = scores
threshold = np.percentile(df_window['anomaly_score'], 97.5)
df_window['is_alert'] = df_window['anomaly_score'] > threshold
print("Alerts flagged:", int(df_window['is_alert'].sum()), "Threshold:", round(threshold,5))

df_feat[['datetime','open','high','low','close','volume','is_injected_fraud']].to_csv("full_sim_prices.csv", index=False)
df_window.to_csv("streamlit_sim_data.csv", index=False)

app_code = f"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import time
from datetime import datetime

DATA_WINDOW = "streamlit_sim_data.csv"
PRICE_CSV = "full_sim_prices.csv"

st.set_page_config(layout="wide", page_title="Crypto Fraud Detection Dashboard")
st.title("🚨 Crypto Fraud Detection — Simulation Demo")
col1, col2, col3 = st.columns([1,1,1])
with col1:
    speed = st.slider("Playback speed (seconds per step)", 0.05, 2.0, value={PLAYBACK_SPEED}, step=0.05)
with col2:
    sensitivity = st.slider("Anomaly threshold percentile", 90, 99, value=97, step=1)
with col3:
    start = st.button("Start Simulation")

df_win = pd.read_csv(DATA_WINDOW, parse_dates=['dt'])
df_price = pd.read_csv(PRICE_CSV, parse_dates=['datetime'])
thr = np.percentile(df_win['anomaly_score'], sensitivity)
df_win['is_alert_dynamic'] = df_win['anomaly_score'] > thr

price_col, info_col = st.columns([2,1])

with price_col:
    st.subheader("Price chart (with model alerts)")
    price_chart = st.empty()
    total = len(df_win)
    cursor = 0
    while True:
        if not start and cursor==0:
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=df_price['datetime'], y=df_price['close'], mode='lines', name='Price'))
            gt = df_price[df_price['is_injected_fraud']==1]
            fig.add_trace(go.Scatter(x=gt['datetime'], y=gt['close'], mode='markers', name='Injected Fraud (GT)'))
            alerts_all = df_win[df_win['is_alert_dynamic']==True]
            fig.add_trace(go.Scatter(x=alerts_all['dt'], y=[df_price.set_index('datetime').loc[dt, 'close'] if dt in df_price['datetime'].values else None for dt in alerts_all['dt']],
                                     mode='markers', name='Model Alerts', marker=dict(color='red', size=8)))
            fig.update_layout(height=600, margin={{"r":0,"t":30,"l":0,"b":0}})
            price_chart.plotly_chart(fig, use_container_width=True)
            break
        if start:
            if cursor >= total:
                st.info("Simulation finished — restart to run again.")
                break
            cur_dt = df_win['dt'].iloc[cursor]
            window_start = df_price['datetime'].iloc[max(0, cursor - 24*7)]
            window_end = df_price['datetime'].iloc[min(len(df_price)-1, cursor + 12)]
            view = df_price[(df_price['datetime'] >= window_start) & (df_price['datetime'] <= window_end)]
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=view['datetime'], y=view['close'], mode='lines', name='Price'))
            gt_view = df_price[(df_price['datetime'] >= window_start) & (df_price['datetime'] <= window_end) & (df_price['is_injected_fraud']==1)]
            if len(gt_view) > 0:
                fig.add_trace(go.Scatter(x=gt_view['datetime'], y=gt_view['close'], mode='markers', name='Injected Fraud (GT)'))
            alerts_up_to_now = df_win[(df_win['is_alert_dynamic']==True) & (df_win['dt'] <= cur_dt)]
            if len(alerts_up_to_now) > 0:
                alert_times = alerts_up_to_now['dt'].tolist()
                alert_prices = []
                for at in alert_times:
                    nearest = df_price.iloc[(df_price['datetime'] - pd.to_datetime(at)).abs().argsort()[:1]]
                    alert_prices.append(nearest['close'].values[0])
                fig.add_trace(go.Scatter(x=alert_times, y=alert_prices, mode='markers', name='Model Alerts'))
            fig.update_layout(height=600, margin={{"r":0,"t":30,"l":0,"b":0}})
            price_chart.plotly_chart(fig, use_container_width=True)
            cursor += 1
            time.sleep(speed)
        else:
            break

with info_col:
    st.subheader("Model & Alerts")
    st.write("Anomaly sensitivity percentile:", sensitivity)
    st.write("Total windows:", len(df_win))
    st.write("Total dynamic alerts:", int(df_win['is_alert_dynamic'].sum()))
    st.write("Static (original) flagged alerts:", int((df_win['anomaly_score'] > df_win['anomaly_score'].quantile(0.975)).sum()))
    st.write("---")
    st.subheader("Recent Alerts (table)")
    recent_alerts = df_win[df_win['is_alert_dynamic']==True].sort_values('dt', ascending=False).head(10)
    st.dataframe(recent_alerts[['dt','anomaly_score','is_injected_fraud']].rename(columns={{'dt':'datetime'}}))
"""

with open("app.py", "w") as f:
    f.write(app_code)

try:
    ngrok.kill()
except Exception:
    pass

os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
os.environ["STREAMLIT_BROWSER_GALLERY"] = "false"
os.environ["STREAMLIT_BROWSER_SERVER_ADDRESS"] = "127.0.0.1"

NGROK_AUTH_TOKEN = "31Ecjc9XKs71emAe3pDtgz1H0Db_6vUMxYjF1kXsSarm5Fraf"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

print("Starting Streamlit...")
get_ipython().system_raw("streamlit run app.py &")
public_url = ngrok.connect(8501).public_url
print("Dashboard URL:", public_url)



Fetching historical data...
Loaded rows: 720
Injected fraud points: 33
Windowed dataset shape: (696, 8)
Alerts flagged: 18 Threshold: -0.00276
Starting Streamlit...
Dashboard URL: https://761f2fa0456e.ngrok-free.app


In [None]:
!pip -q install ccxt streamlit np pyngrok plotly pandas scikit-learn ta tensorflow --quiet

import os, time, random
from pyngrok import ngrok
import pandas as pd
import numpy as np
import ccxt
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import ta
from datetime import datetime, timedelta
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

SYMBOL = "BTC/USDT"
TIMEFRAME = "1h"
HOURS = 24 * 30
SEQ_LEN = 24
NUM_PUMP_EVENTS = 6
RANDOM_SEED = 42
INJECT_FRAUD = True
CONTAMINATION = 0.02
PLAYBACK_SPEED = 0.25
ROLLING_WINDOW = 50

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# Fetch or generate OHLCV data
def fetch_binance_ohlcv(symbol=SYMBOL, timeframe=TIMEFRAME, limit=1000):
    try:
        exchange = ccxt.kucoin()
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)
        df = pd.DataFrame(ohlcv, columns=['ts','open','high','low','close','volume'])
        df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
        return df[['datetime','open','high','low','close','volume']]
    except Exception as e:
        print("ccxt fetch failed:", e)
        return None

df = fetch_binance_ohlcv(limit=HOURS)
if df is None or len(df) < 200:
    end = datetime.utcnow()
    periods = HOURS
    dates = [end - timedelta(hours=(periods - i)) for i in range(periods)]
    price = 30000.0 + np.cumsum(np.random.normal(0, 100, size=periods))
    vol = np.random.uniform(50, 500, size=periods)
    df = pd.DataFrame({'datetime': dates, 'open': price, 'high': price*1.002, 'low': price*0.998, 'close': price, 'volume': vol})

df_sim = df.copy().reset_index(drop=True)
df_sim['is_injected_fraud'] = 0

# Inject synthetic pump/dump fraud events
if INJECT_FRAUD:
    n = len(df_sim)
    possible_idx = list(range(30, n-30))
    random.shuffle(possible_idx)
    injected = []
    min_gap = SEQ_LEN + 12
    for idx in possible_idx:
        if len(injected) >= NUM_PUMP_EVENTS:
            break
        if all(abs(idx - j) > min_gap for j in injected):
            injected.append(idx)
    for idx in injected:
        pump_len = random.choice([1,2,3])
        mag = random.uniform(1.12, 1.6)
        dump_len = pump_len + random.choice([1,2])
        for j in range(pump_len):
            i = min(idx + j, n-1)
            base = df_sim.loc[i, 'close']
            df_sim.loc[i, ['open','high','low','close']] = base * mag
            df_sim.loc[i, 'is_injected_fraud'] = 1
        for j in range(dump_len):
            i = min(idx + pump_len + j, n-1)
            base = df.loc[i, 'close']
            df_sim.loc[i, ['open','high','low','close']] = base * random.uniform(0.6,0.95)
            df_sim.loc[i, 'is_injected_fraud'] = 1
        if random.random() < 0.4:
            for j in range(-1,3):
                i = min(max(idx + j, 0), n-1)
                df_sim.loc[i, 'volume'] *= random.uniform(3,10)
                df_sim.loc[i, 'is_injected_fraud'] = 1

# Feature engineering
df_feat = df_sim.copy()
df_feat['rsi'] = ta.momentum.rsi(df_feat['close'], window=14).bfill()
df_feat['ema20'] = ta.trend.ema_indicator(df_feat['close'], window=20).bfill()
bb = ta.volatility.BollingerBands(df_feat['close'], window=20, window_dev=2)
df_feat['bb_w'] = (bb.bollinger_hband() - bb.bollinger_lband()).bfill()
df_feat['returns'] = df_feat['close'].pct_change().fillna(0)
df_feat['vol_norm'] = (df_feat['volume'] - df_feat['volume'].rolling(20).mean()).fillna(0)
df_feat = df_feat.dropna().reset_index(drop=True)

# Windowed features
WINDOW = SEQ_LEN
rows = []
for i in range(WINDOW, len(df_feat)):
    win = df_feat.iloc[i-WINDOW:i]
    row = {
        'dt': df_feat['datetime'].iloc[i],
        'close_mean': win['close'].mean(),
        'close_std': win['close'].std(),
        'vol_sum': win['volume'].sum(),
        'ret_mean': win['returns'].mean(),
        'rsi_mean': win['rsi'].mean(),
        'bbw_mean': win['bb_w'].mean(),
        'is_injected_fraud': int(df_feat['is_injected_fraud'].iloc[i])
    }
    rows.append(row)
df_window = pd.DataFrame(rows).reset_index(drop=True)

X = df_window[['close_mean','close_std','vol_sum','ret_mean','rsi_mean','bbw_mean']].values
scaler = StandardScaler()
Xs = scaler.fit_transform(X)

# Isolation Forest
iso = IsolationForest(contamination=CONTAMINATION, random_state=RANDOM_SEED)
iso.fit(Xs)
scores_if = -iso.decision_function(Xs)

# LSTM Autoencoder
X_lstm = Xs.reshape((Xs.shape[0], Xs.shape[1], 1))
lstm_model = Sequential([
    LSTM(32, activation='relu', input_shape=(X_lstm.shape[1], 1)),
    Dense(X_lstm.shape[1])
])
lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X_lstm, X_lstm, epochs=20, batch_size=16, verbose=0)
recon = lstm_model.predict(X_lstm)
scores_lstm = np.mean((X_lstm[:,:,0] - recon)**2, axis=1)

df_window['score_if'] = scores_if
df_window['score_lstm'] = scores_lstm

# Thresholds
df_window['threshold_percentile_if'] = np.percentile(scores_if, 97.5)
df_window['threshold_zscore_if'] = scores_if.mean() + 2.5*scores_if.std()
df_window['rolling_mean_if'] = pd.Series(scores_if).rolling(ROLLING_WINDOW).mean()
df_window['rolling_std_if'] = pd.Series(scores_if).rolling(ROLLING_WINDOW).std()
df_window['threshold_rolling_if'] = df_window['rolling_mean_if'] + 2.5*df_window['rolling_std_if']

df_feat[['datetime','open','high','low','close','volume','is_injected_fraud']].to_csv("full_sim_prices.csv", index=False)
df_window.to_csv("streamlit_sim_data.csv", index=False)

# Streamlit app code
app_code = f"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import time

DATA_WINDOW = "streamlit_sim_data.csv"
PRICE_CSV = "full_sim_prices.csv"

st.set_page_config(layout="wide", page_title="Crypto Fraud Detection Dashboard")
st.title("🚨 Crypto Fraud Detection — Simulation Demo")

col1, col2, col3 = st.columns([1,1,1])
with col1:
    speed = st.slider("Playback speed (seconds per step)", 0.05, 2.0, value={PLAYBACK_SPEED}, step=0.05)
with col2:
    model_choice = st.selectbox("Select Model", ["Isolation Forest", "LSTM"])
with col3:
    threshold_method = st.selectbox("Select Threshold Method", ["Percentile", "Z-score", "Rolling"])

with st.sidebar:
    start = st.button("Start Simulation")

df_win = pd.read_csv(DATA_WINDOW, parse_dates=['dt'])
df_price = pd.read_csv(PRICE_CSV, parse_dates=['datetime'])

if model_choice=="Isolation Forest":
    score_col = 'score_if'
    if threshold_method=="Percentile":
        thr = df_win['threshold_percentile_if'].iloc[0]
    elif threshold_method=="Z-score":
        thr = df_win['threshold_zscore_if'].iloc[0]
    else:
        thr = df_win['threshold_rolling_if']
else:
    score_col = 'score_lstm'
    # LSTM thresholds can be same as IF for simplicity
    thr = np.percentile(df_win[score_col], 97.5) if threshold_method=="Percentile" else df_win[score_col].mean() + 2.5*df_win[score_col].std()

df_win['is_alert_dynamic'] = df_win[score_col] > thr

price_col, info_col = st.columns([2,1])
with price_col:
    st.subheader("Price chart (with model alerts)")
    price_chart = st.empty()
    total = len(df_win)
    cursor = 0
    while True:
        if cursor >= total:
            st.info("Simulation finished — restart to run again.")
            break
        cur_dt = df_win['dt'].iloc[cursor]
        window_start = df_price['datetime'].iloc[max(0, cursor - 24*7)]
        window_end = df_price['datetime'].iloc[min(len(df_price)-1, cursor + 12)]
        view = df_price[(df_price['datetime'] >= window_start) & (df_price['datetime'] <= window_end)]
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=view['datetime'], y=view['close'], mode='lines', name='Price'))
        gt_view = df_price[(df_price['datetime'] >= window_start) & (df_price['datetime'] <= window_end) & (df_price['is_injected_fraud']==1)]
        if len(gt_view) > 0:
            fig.add_trace(go.Scatter(x=gt_view['datetime'], y=gt_view['close'], mode='markers', name='Injected Fraud (GT)'))
        alerts_up_to_now = df_win[(df_win['is_alert_dynamic']==True) & (df_win['dt'] <= cur_dt)]
        if len(alerts_up_to_now) > 0:
            alert_times = alerts_up_to_now['dt'].tolist()
            alert_prices = []
            for at in alert_times:
                nearest = df_price.iloc[(df_price['datetime'] - pd.to_datetime(at)).abs().argsort()[:1]]
                alert_prices.append(nearest['close'].values[0])
            fig.add_trace(go.Scatter(x=alert_times, y=alert_prices, mode='markers', name='Model Alerts'))
        fig.update_layout(height=600, margin={{"r":0,"t":30,"l":0,"b":0}})
        price_chart.plotly_chart(fig, use_container_width=True)
        cursor += 1
        time.sleep(speed)

with info_col:
    st.subheader("Model & Alerts")
    st.write("Model:", model_choice)
    st.write("Threshold method:", threshold_method)
    st.write("Total windows:", len(df_win))
    st.write("Total dynamic alerts:", int(df_win['is_alert_dynamic'].sum()))
    st.subheader("Recent Alerts (table)")
    recent_alerts = df_win[df_win['is_alert_dynamic']==True].sort_values('dt', ascending=False).head(10)
    st.dataframe(recent_alerts[['dt','anomaly_score','is_injected_fraud']].rename(columns={{'dt':'datetime'}}))
"""

with open("app.py", "w") as f:
    f.write(app_code)

try: ngrok.kill()
except: pass

os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
os.environ["STREAMLIT_BROWSER_GALLERY"] = "false"
os.environ["STREAMLIT_BROWSER_SERVER_ADDRESS"] = "127.0.0.1"

NGROK_AUTH_TOKEN = "31Ecjc9XKs71emAe3pDtgz1H0Db_6vUMxYjF1kXsSarm5Fraf"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

get_ipython().system_raw("streamlit run app.py &")
public_url = ngrok.connect(8501).public_url
print("Dashboard URL:", public_url)


In [None]:
"""# Install dependencies
!pip -q install ccxt streamlit pyngrok plotly pandas scikit-learn ta tensorflow --quiet

import os, time, random
from pyngrok import ngrok
import pandas as pd
import numpy as np
import ccxt
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import ta
from datetime import datetime, timedelta
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed

# -------------------------
# PARAMETERS
# -------------------------
SYMBOL = "BTC/USDT"
TIMEFRAME = "1h"
HOURS = 24*30
SEQ_LEN = 24
NUM_PUMP_EVENTS = 6
RANDOM_SEED = 42
INJECT_FRAUD = True
CONTAMINATION = 0.02
PLAYBACK_SPEED = 0.25

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# -------------------------
# FETCH DATA / SIMULATION
# -------------------------
def fetch_binance_ohlcv(symbol=SYMBOL, timeframe=TIMEFRAME, limit=1000):
    try:
        exchange = ccxt.binance()
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)
        df = pd.DataFrame(ohlcv, columns=['ts','open','high','low','close','volume'])
        df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
        df = df[['datetime','open','high','low','close','volume']]
        return df
    except:
        return None

df = fetch_binance_ohlcv(limit=HOURS)
if df is None or len(df)<200:
    end = datetime.utcnow()
    periods = HOURS
    dates = [end - timedelta(hours=(periods - i)) for i in range(periods)]
    price = 30000.0 + np.cumsum(np.random.normal(0,100,size=periods))
    vol = np.random.uniform(50,500,size=periods)
    df = pd.DataFrame({'datetime':dates,'open':price,'high':price*1.002,'low':price*0.998,'close':price,'volume':vol})
    df = df.sort_values('datetime').reset_index(drop=True)

# -------------------------
# INJECT FRAUD EVENTS
# -------------------------
df_sim = df.copy().reset_index(drop=True)
df_sim['is_injected_fraud'] = 0
if INJECT_FRAUD:
    n = len(df_sim)
    possible_idx = list(range(30, n-30))
    random.shuffle(possible_idx)
    injected = []
    min_gap = SEQ_LEN + 12
    for idx in possible_idx:
        if len(injected) >= NUM_PUMP_EVENTS:
            break
        if all(abs(idx - j) > min_gap for j in injected):
            injected.append(idx)
    for idx in injected:
        pump_len = random.choice([1,2,3])
        mag = random.uniform(1.12,1.6)
        dump_len = pump_len + random.choice([1,2])
        for j in range(pump_len):
            i = min(idx+j, n-1)
            base = df_sim.loc[i,'close']
            df_sim.loc[i,['open','high','low','close']] = base*mag
            df_sim.loc[i,'is_injected_fraud'] = 1
        for j in range(dump_len):
            i = min(idx+pump_len+j, n-1)
            base = df.loc[i,'close']
            df_sim.loc[i,['open','high','low','close']] = base*random.uniform(0.6,0.95)
            df_sim.loc[i,'is_injected_fraud'] = 1
        if random.random()<0.4:
            for j in range(-1,3):
                i = min(max(idx+j,0), n-1)
                df_sim.loc[i,'volume'] *= random.uniform(3,10)
                df_sim.loc[i,'is_injected_fraud'] = 1

# -------------------------
# FEATURE ENGINEERING
# -------------------------
df_feat = df_sim.copy()
df_feat['rsi'] = ta.momentum.rsi(df_feat['close'], window=14).bfill()
df_feat['ema20'] = ta.trend.ema_indicator(df_feat['close'], window=20).bfill()
bb = ta.volatility.BollingerBands(df_feat['close'], window=20, window_dev=2)
df_feat['bb_w'] = (bb.bollinger_hband() - bb.bollinger_lband()).bfill()
df_feat['returns'] = df_feat['close'].pct_change().fillna(0)
df_feat['vol_norm'] = (df_feat['volume'] - df_feat['volume'].rolling(20).mean()).fillna(0)
df_feat = df_feat.dropna().reset_index(drop=True)

# -------------------------
# CREATE WINDOWED DATA
# -------------------------
WINDOW = SEQ_LEN
rows = []
for i in range(WINDOW, len(df_feat)):
    win = df_feat.iloc[i-WINDOW:i]
    row = {
        'dt': df_feat['datetime'].iloc[i],
        'close_mean': win['close'].mean(),
        'close_std': win['close'].std(),
        'vol_sum': win['volume'].sum(),
        'ret_mean': win['returns'].mean(),
        'rsi_mean': win['rsi'].mean(),
        'bbw_mean': win['bb_w'].mean(),
        'is_injected_fraud': int(df_feat['is_injected_fraud'].iloc[i])
    }
    rows.append(row)
df_window = pd.DataFrame(rows).reset_index(drop=True)

# -------------------------
# ISOLATION FOREST
# -------------------------
X = df_window[['close_mean','close_std','vol_sum','ret_mean','rsi_mean','bbw_mean']].values
scaler = StandardScaler()
Xs = scaler.fit_transform(X)
iso = IsolationForest(contamination=CONTAMINATION, random_state=RANDOM_SEED)
iso.fit(Xs)
scores_iso = -iso.decision_function(Xs)
df_window['anomaly_score_iso'] = scores_iso
threshold_iso = np.percentile(df_window['anomaly_score_iso'], 97.5)
df_window['is_alert_iso'] = df_window['anomaly_score_iso'] > threshold_iso

# -------------------------
# LSTM AUTOENCODER
# -------------------------
# Prepare sequence data
seq_features = ['close_mean','close_std','vol_sum','ret_mean','rsi_mean','bbw_mean']
data_seq = df_window[seq_features].values
scaler_lstm = StandardScaler()
data_seq = scaler_lstm.fit_transform(data_seq)
X_seq = []
for i in range(len(data_seq)-SEQ_LEN):
    X_seq.append(data_seq[i:i+SEQ_LEN])
X_seq = np.array(X_seq)

# Train LSTM autoencoder
model = Sequential([
    LSTM(32, activation='relu', input_shape=(SEQ_LEN, len(seq_features)), return_sequences=False),
    RepeatVector(SEQ_LEN),
    LSTM(32, activation='relu', return_sequences=True),
    TimeDistributed(Dense(len(seq_features)))
])
model.compile(optimizer='adam', loss='mse')
model.fit(X_seq, X_seq, epochs=20, batch_size=32, verbose=0)

# Compute reconstruction error
X_pred = model.predict(X_seq, verbose=0)
mse = np.mean(np.square(X_pred - X_seq), axis=(1,2))
mse_full = np.concatenate([np.zeros(SEQ_LEN), mse])  # pad to align with df_window
df_window['anomaly_score_lstm'] = mse_full
threshold_lstm = np.percentile(df_window['anomaly_score_lstm'], 97.5)
df_window['is_alert_lstm'] = df_window['anomaly_score_lstm'] > threshold_lstm

# -------------------------
# SAVE FOR STREAMLIT
# -------------------------
df_feat[['datetime','open','high','low','close','volume','is_injected_fraud']].to_csv("full_sim_prices.csv", index=False)
df_window.to_csv("streamlit_sim_data.csv", index=False)

# -------------------------
# STREAMLIT APP
# -------------------------
app_code = f"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import time

DATA_WINDOW = "streamlit_sim_data.csv"
PRICE_CSV = "full_sim_prices.csv"

st.set_page_config(layout="wide", page_title="Crypto Fraud Detection Dashboard")
st.title("🚨 Crypto Fraud Detection — Simulation Demo")

model_choice = st.radio("Choose Model", ['Isolation Forest','LSTM Autoencoder'])
col1, col2 = st.columns([1,1])
with col1:
    speed = st.slider("Playback speed (seconds per step)", 0.05, 2.0, value={PLAYBACK_SPEED}, step=0.05)
with col2:
    sensitivity = st.slider("Anomaly threshold percentile", 90, 99, value=97, step=1)

df_win = pd.read_csv(DATA_WINDOW, parse_dates=['dt'])
df_price = pd.read_csv(PRICE_CSV, parse_dates=['datetime'])

# Select anomaly score based on model
score_col = 'anomaly_score_iso' if model_choice=='Isolation Forest' else 'anomaly_score_lstm'
alert_col = 'is_alert_iso' if model_choice=='Isolation Forest' else 'is_alert_lstm'
thr = np.percentile(df_win[score_col], sensitivity)
df_win['is_alert_dynamic'] = df_win[score_col] > thr

price_col, info_col = st.columns([2,1])
with price_col:
    st.subheader("Price chart (with model alerts)")
    price_chart = st.empty()
    total = len(df_win)
    cursor = 0
    while True:
        if cursor >= total:
            st.info("Simulation finished — restart to run again.")
            break
        cur_dt = df_win['dt'].iloc[cursor]
        window_start = df_price['datetime'].iloc[max(0,cursor-24*7)]
        window_end = df_price['datetime'].iloc[min(len(df_price)-1, cursor+12)]
        view = df_price[(df_price['datetime']>=window_start)&(df_price['datetime']<=window_end)]
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=view['datetime'], y=view['close'], mode='lines', name='Price'))
        gt_view = df_price[(df_price['datetime']>=window_start)&(df_price['datetime']<=window_end)&(df_price['is_injected_fraud']==1)]
        if len(gt_view)>0:
            fig.add_trace(go.Scatter(x=gt_view['datetime'], y=gt_view['close'], mode='markers', name='Injected Fraud (GT)'))
        alerts_up_to_now = df_win[(df_win['is_alert_dynamic']==True)&(df_win['dt']<=cur_dt)]
        if len(alerts_up_to_now)>0:
            alert_times = alerts_up_to_now['dt'].tolist()
            alert_prices = []
            for at in alert_times:
                nearest = df_price.iloc[(df_price['datetime']-pd.to_datetime(at)).abs().argsort()[:1]]
                alert_prices.append(nearest['close'].values[0])
            fig.add_trace(go.Scatter(x=alert_times, y=alert_prices, mode='markers', name='Model Alerts'))
        fig.update_layout(height=600, margin={{"r":0,"t":30,"l":0,"b":0}})
        price_chart.plotly_chart(fig, use_container_width=True)
        cursor += 1
        time.sleep(speed)

with info_col:
    st.subheader("Model & Alerts")
    st.write("Chosen model:", model_choice)
    st.write("Sensitivity percentile:", sensitivity)
    st.write("Total windows:", len(df_win))
    st.write("Total dynamic alerts:", int(df_win['is_alert_dynamic'].sum()))
    st.write("Static flagged alerts:", int((df_win[score_col] > df_win[score_col].quantile(0.975)).sum()))
    st.subheader("Recent Alerts (table)")
    recent_alerts = df_win[df_win['is_alert_dynamic']==True].sort_values('dt', ascending=False).head(10)
    st.dataframe(recent_alerts[['dt','anomaly_score_iso','anomaly_score_lstm','is_injected_fraud']])
"""

with open("app.py","w") as f:
    f.write(app_code)

# -------------------------
# NGROK / STREAMLIT
# -------------------------
try: ngrok.kill()
except: pass
os.environ["STREAMLIT_SERVER_HEADLESS"]="true"

NGROK_AUTH_TOKEN = "31Ecjc9XKs71emAe3pDtgz1H0Db_6vUMxYjF1kXsSarm5Fraf"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

get_ipython().system_raw("streamlit run app.py &")
public_url = ngrok.connect(8501).public_url
print("Dashboard URL:", public_url)"""


In [None]:
!pip -q install ccxt streamlit pyngrok plotly pandas scikit-learn ta tensorflow --quiet

import os, time, random
from pyngrok import ngrok
import pandas as pd
import numpy as np
import ccxt
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import ta
from datetime import datetime, timedelta
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

SYMBOL = "BTC/USDT"
TIMEFRAME = "1h"
HOURS = 24 * 30
SEQ_LEN = 24
NUM_PUMP_EVENTS = 6
RANDOM_SEED = 42
INJECT_FRAUD = True
CONTAMINATION = 0.02
PLAYBACK_SPEED = 0.25
ROLLING_WINDOW = 50

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# Fetch or generate OHLCV data
def fetch_binance_ohlcv(symbol=SYMBOL, timeframe=TIMEFRAME, limit=1000):
    try:
        exchange = ccxt.binance()
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)
        df = pd.DataFrame(ohlcv, columns=['ts','open','high','low','close','volume'])
        df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
        return df[['datetime','open','high','low','close','volume']]
    except Exception as e:
        print("ccxt fetch failed:", e)
        return None

df = fetch_binance_ohlcv(limit=HOURS)
if df is None or len(df) < 200:
    end = datetime.utcnow()
    periods = HOURS
    dates = [end - timedelta(hours=(periods - i)) for i in range(periods)]
    price = 30000.0 + np.cumsum(np.random.normal(0, 100, size=periods))
    vol = np.random.uniform(50, 500, size=periods)
    df = pd.DataFrame({'datetime': dates, 'open': price, 'high': price*1.002, 'low': price*0.998, 'close': price, 'volume': vol})

df_sim = df.copy().reset_index(drop=True)
df_sim['is_injected_fraud'] = 0

# Inject synthetic pump/dump fraud events
if INJECT_FRAUD:
    n = len(df_sim)
    possible_idx = list(range(30, n-30))
    random.shuffle(possible_idx)
    injected = []
    min_gap = SEQ_LEN + 12
    for idx in possible_idx:
        if len(injected) >= NUM_PUMP_EVENTS:
            break
        if all(abs(idx - j) > min_gap for j in injected):
            injected.append(idx)
    for idx in injected:
        pump_len = random.choice([1,2,3])
        mag = random.uniform(1.12, 1.6)
        dump_len = pump_len + random.choice([1,2])
        for j in range(pump_len):
            i = min(idx + j, n-1)
            base = df_sim.loc[i, 'close']
            df_sim.loc[i, ['open','high','low','close']] = base * mag
            df_sim.loc[i, 'is_injected_fraud'] = 1
        for j in range(dump_len):
            i = min(idx + pump_len + j, n-1)
            base = df.loc[i, 'close']
            df_sim.loc[i, ['open','high','low','close']] = base * random.uniform(0.6,0.95)
            df_sim.loc[i, 'is_injected_fraud'] = 1
        if random.random() < 0.4:
            for j in range(-1,3):
                i = min(max(idx + j, 0), n-1)
                df_sim.loc[i, 'volume'] *= random.uniform(3,10)
                df_sim.loc[i, 'is_injected_fraud'] = 1

# Feature engineering
df_feat = df_sim.copy()
df_feat['rsi'] = ta.momentum.rsi(df_feat['close'], window=14).bfill()
df_feat['ema20'] = ta.trend.ema_indicator(df_feat['close'], window=20).bfill()
bb = ta.volatility.BollingerBands(df_feat['close'], window=20, window_dev=2)
df_feat['bb_w'] = (bb.bollinger_hband() - bb.bollinger_lband()).bfill()
df_feat['returns'] = df_feat['close'].pct_change().fillna(0)
df_feat['vol_norm'] = (df_feat['volume'] - df_feat['volume'].rolling(20).mean()).fillna(0)
df_feat = df_feat.dropna().reset_index(drop=True)

# Windowed features
WINDOW = SEQ_LEN
rows = []
for i in range(WINDOW, len(df_feat)):
    win = df_feat.iloc[i-WINDOW:i]
    row = {
        'dt': df_feat['datetime'].iloc[i],
        'close_mean': win['close'].mean(),
        'close_std': win['close'].std(),
        'vol_sum': win['volume'].sum(),
        'ret_mean': win['returns'].mean(),
        'rsi_mean': win['rsi'].mean(),
        'bbw_mean': win['bb_w'].mean(),
        'is_injected_fraud': int(df_feat['is_injected_fraud'].iloc[i])
    }
    rows.append(row)
df_window = pd.DataFrame(rows).reset_index(drop=True)

X = df_window[['close_mean','close_std','vol_sum','ret_mean','rsi_mean','bbw_mean']].values
scaler = StandardScaler()
Xs = scaler.fit_transform(X)

# Isolation Forest
iso = IsolationForest(contamination=CONTAMINATION, random_state=RANDOM_SEED)
iso.fit(Xs)
scores_if = -iso.decision_function(Xs)

# LSTM Autoencoder
X_lstm = Xs.reshape((Xs.shape[0], Xs.shape[1], 1))
lstm_model = Sequential([
    LSTM(32, activation='relu', input_shape=(X_lstm.shape[1], 1)),
    Dense(X_lstm.shape[1])
])
lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X_lstm, X_lstm, epochs=20, batch_size=16, verbose=0)
recon = lstm_model.predict(X_lstm)
scores_lstm = np.mean((X_lstm[:,:,0] - recon)**2, axis=1)

df_window['score_if'] = scores_if
df_window['score_lstm'] = scores_lstm

# Thresholds
df_window['threshold_percentile_if'] = np.percentile(scores_if, 97.5)
df_window['threshold_zscore_if'] = scores_if.mean() + 2.5*scores_if.std()
df_window['rolling_mean_if'] = pd.Series(scores_if).rolling(ROLLING_WINDOW).mean()
df_window['rolling_std_if'] = pd.Series(scores_if).rolling(ROLLING_WINDOW).std()
df_window['threshold_rolling_if'] = df_window['rolling_mean_if'] + 2.5*df_window['rolling_std_if']

df_feat[['datetime','open','high','low','close','volume','is_injected_fraud']].to_csv("full_sim_prices.csv", index=False)
df_window.to_csv("streamlit_sim_data.csv", index=False)

# Streamlit app code
app_code = f"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import time

DATA_WINDOW = "streamlit_sim_data.csv"
PRICE_CSV = "full_sim_prices.csv"

st.set_page_config(layout="wide", page_title="Crypto Fraud Detection Dashboard")
st.title("🚨 Crypto Fraud Detection — Simulation Demo")

col1, col2, col3 = st.columns([1,1,1])
with col1:
    speed = st.slider("Playback speed (seconds per step)", 0.05, 2.0, value={PLAYBACK_SPEED}, step=0.05)
with col2:
    model_choice = st.selectbox("Select Model", ["Isolation Forest", "LSTM"])
with col3:
    threshold_method = st.selectbox("Select Threshold Method", ["Percentile", "Z-score", "Rolling"])

with st.sidebar:
    start = st.button("Start Simulation")

df_win = pd.read_csv(DATA_WINDOW, parse_dates=['dt'])
df_price = pd.read_csv(PRICE_CSV, parse_dates=['datetime'])

if model_choice=="Isolation Forest":
    score_col = 'score_if'
    if threshold_method=="Percentile":
        thr = df_win['threshold_percentile_if'].iloc[0]
    elif threshold_method=="Z-score":
        thr = df_win['threshold_zscore_if'].iloc[0]
    else:
        thr = df_win['threshold_rolling_if']
else:
    score_col = 'score_lstm'
    # LSTM thresholds can be same as IF for simplicity
    thr = np.percentile(df_win[score_col], 97.5) if threshold_method=="Percentile" else df_win[score_col].mean() + 2.5*df_win[score_col].std()

df_win['is_alert_dynamic'] = df_win[score_col] > thr

price_col, info_col = st.columns([2,1])
with price_col:
    st.subheader("Price chart (with model alerts)")
    price_chart = st.empty()
    total = len(df_win)
    cursor = 0
    while True:
        if cursor >= total:
            st.info("Simulation finished — restart to run again.")
            break
        cur_dt = df_win['dt'].iloc[cursor]
        window_start = df_price['datetime'].iloc[max(0, cursor - 24*7)]
        window_end = df_price['datetime'].iloc[min(len(df_price)-1, cursor + 12)]
        view = df_price[(df_price['datetime'] >= window_start) & (df_price['datetime'] <= window_end)]
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=view['datetime'], y=view['close'], mode='lines', name='Price'))
        gt_view = df_price[(df_price['datetime'] >= window_start) & (df_price['datetime'] <= window_end) & (df_price['is_injected_fraud']==1)]
        if len(gt_view) > 0:
            fig.add_trace(go.Scatter(x=gt_view['datetime'], y=gt_view['close'], mode='markers', name='Injected Fraud (GT)'))
        alerts_up_to_now = df_win[(df_win['is_alert_dynamic']==True) & (df_win['dt'] <= cur_dt)]
        if len(alerts_up_to_now) > 0:
            alert_times = alerts_up_to_now['dt'].tolist()
            alert_prices = []
            for at in alert_times:
                nearest = df_price.iloc[(df_price['datetime'] - pd.to_datetime(at)).abs().argsort()[:1]]
                alert_prices.append(nearest['close'].values[0])
            fig.add_trace(go.Scatter(x=alert_times, y=alert_prices, mode='markers', name='Model Alerts'))
        fig.update_layout(height=600, margin={{"r":0,"t":30,"l":0,"b":0}})
        price_chart.plotly_chart(fig, use_container_width=True)
        cursor += 1
        time.sleep(speed)

with info_col:
    st.subheader("Model & Alerts")
    st.write("Model:", model_choice)
    st.write("Threshold method:", threshold_method)
    st.write("Total windows:", len(df_win))
    st.write("Total dynamic alerts:", int(df_win['is_alert_dynamic'].sum()))
    st.subheader("Recent Alerts (table)")
    recent_alerts = df_win[df_win['is_alert_dynamic']==True].sort_values('dt', ascending=False).head(10)
    st.dataframe(recent_alerts[['dt','anomaly_score','is_injected_fraud']].rename(columns={{'dt':'datetime'}}))
"""

with open("app.py", "w") as f:
    f.write(app_code)

try: ngrok.kill()
except: pass

os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
os.environ["STREAMLIT_BROWSER_GALLERY"] = "false"
os.environ["STREAMLIT_BROWSER_SERVER_ADDRESS"] = "127.0.0.1"

NGROK_AUTH_TOKEN = "31Ecjc9XKs71emAe3pDtgz1H0Db_6vUMxYjF1kXsSarm5Fraf"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

get_ipython().system_raw("streamlit run app.py &")
public_url = ngrok.connect(8501).public_url
print("Dashboard URL:", public_url)


  super().__init__(**kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step
Dashboard URL: https://c8ee83c04763.ngrok-free.app


In [None]:
!pip -q install ccxt streamlit pyngrok plotly pandas scikit-learn ta keras tensorflow --quiet

import os, time, random
import pandas as pd, numpy as np
import ccxt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from pyngrok import ngrok
import ta
from datetime import datetime, timedelta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# ------------------- Simulation Parameters -------------------
SYMBOL = "BTC/USDT"
TIMEFRAME = "1h"
HOURS = 24*30
SEQ_LEN = 24
NUM_PUMP_EVENTS = 6
RANDOM_SEED = 42
INJECT_FRAUD = True
CONTAMINATION = 0.02
PLAYBACK_SPEED = 0.25

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# ------------------- Fetch or Generate Data -------------------
def fetch_binance_ohlcv(symbol=SYMBOL, timeframe=TIMEFRAME, limit=1000):
    try:
        exchange = ccxt.kucoin()
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)
        df = pd.DataFrame(ohlcv, columns=['ts','open','high','low','close','volume'])
        df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
        df = df[['datetime','open','high','low','close','volume']]
        return df
    except Exception as e:
        print("ccxt fetch failed:", e)
        return None

print("Fetching historical data...")
df = fetch_binance_ohlcv(limit=HOURS)
if df is None or len(df) < 200:
    print("Falling back to synthetic random-walk data (no network or insufficient history).")
    end = datetime.utcnow()
    periods = HOURS
    dates = [end - timedelta(hours=(periods - i)) for i in range(periods)]
    price = 30000.0 + np.cumsum(np.random.normal(0,100,size=periods))
    vol = np.random.uniform(50,500,size=periods)
    df = pd.DataFrame({'datetime': dates,'open':price,'high':price*1.002,'low':price*0.998,'close':price,'volume':vol})
df = df.sort_values('datetime').reset_index(drop=True)

# ------------------- Inject Fraud -------------------
df_sim = df.copy().reset_index(drop=True)
df_sim['is_injected_fraud'] = 0
if INJECT_FRAUD:
    n = len(df_sim)
    possible_idx = list(range(30, n-30))
    random.shuffle(possible_idx)
    injected = []
    min_gap = SEQ_LEN + 12
    for idx in possible_idx:
        if len(injected) >= NUM_PUMP_EVENTS:
            break
        if all(abs(idx - j) > min_gap for j in injected):
            injected.append(idx)
    for idx in injected:
        pump_len = random.choice([1,2,3])
        mag = random.uniform(1.12,1.6)
        dump_len = pump_len + random.choice([1,2])
        for j in range(pump_len):
            i = min(idx+j, n-1)
            base = df_sim.loc[i,'close']
            df_sim.loc[i,['open','high','low','close']] = base*mag
            df_sim.loc[i,'is_injected_fraud'] = 1
        for j in range(dump_len):
            i = min(idx+pump_len+j, n-1)
            base = df.loc[i,'close']
            df_sim.loc[i,['open','high','low','close']] = base*random.uniform(0.6,0.95)
            df_sim.loc[i,'is_injected_fraud'] = 1
        if random.random() < 0.4:
            for j in range(-1,3):
                i = min(max(idx+j,0), n-1)
                df_sim.loc[i,'volume'] *= random.uniform(3,10)
                df_sim.loc[i,'is_injected_fraud'] = 1

print("Injected fraud points:", int(df_sim['is_injected_fraud'].sum()))

# ------------------- Feature Engineering -------------------
df_feat = df_sim.copy()
df_feat['rsi'] = ta.momentum.rsi(df_feat['close'],window=14).bfill()
df_feat['ema20'] = ta.trend.ema_indicator(df_feat['close'],window=20).bfill()
bb = ta.volatility.BollingerBands(df_feat['close'],window=20,window_dev=2)
df_feat['bb_w'] = (bb.bollinger_hband() - bb.bollinger_lband()).bfill()
df_feat['returns'] = df_feat['close'].pct_change().fillna(0)
df_feat['vol_norm'] = (df_feat['volume'] - df_feat['volume'].rolling(20).mean()).fillna(0)
df_feat = df_feat.dropna().reset_index(drop=True)

# ------------------- Windowed Dataset -------------------
WINDOW = SEQ_LEN
rows = []
for i in range(WINDOW,len(df_feat)):
    win = df_feat.iloc[i-WINDOW:i]
    row = {
        'dt': df_feat['datetime'].iloc[i],
        'close_mean': win['close'].mean(),
        'close_std': win['close'].std(),
        'vol_sum': win['volume'].sum(),
        'ret_mean': win['returns'].mean(),
        'rsi_mean': win['rsi'].mean(),
        'bbw_mean': win['bb_w'].mean(),
        'is_injected_fraud': int(df_feat['is_injected_fraud'].iloc[i])
    }
    rows.append(row)
df_window = pd.DataFrame(rows).reset_index(drop=True)
X = df_window[['close_mean','close_std','vol_sum','ret_mean','rsi_mean','bbw_mean']].values
scaler = StandardScaler()
Xs = scaler.fit_transform(X)

# ------------------- Isolation Forest -------------------
iso = IsolationForest(contamination=CONTAMINATION, random_state=RANDOM_SEED)
iso.fit(Xs)
scores_iso = -iso.decision_function(Xs)
df_window['score_iso'] = scores_iso

# ------------------- Robust Threshold (Median + 3*MAD) -------------------
median = np.median(scores_iso)
mad = np.median(np.abs(scores_iso - median))
robust_thr = median + 3*mad

# ------------------- LSTM Model -------------------
SEQ_INPUT = 24
X_lstm = []
y_lstm = []
for i in range(SEQ_INPUT, len(df_feat)):
    X_lstm.append(df_feat[['close','volume']].iloc[i-SEQ_INPUT:i].values)
    y_lstm.append(df_feat['close'].iloc[i])
X_lstm = np.array(X_lstm)
y_lstm = np.array(y_lstm)

lstm_model = Sequential()
lstm_model.add(LSTM(32, input_shape=(SEQ_INPUT,2)))
lstm_model.add(Dense(1))
lstm_model.compile(loss='mse', optimizer='adam')
lstm_model.fit(X_lstm, y_lstm, epochs=5, batch_size=16, verbose=0)

pred_lstm = lstm_model.predict(X_lstm, verbose=0)
lstm_error = np.abs(pred_lstm.flatten() - y_lstm)
df_window['score_lstm'] = lstm_error

# ------------------- Static Threshold -------------------
thr_static = np.percentile(scores_iso, 97.5)

# ------------------- Save Data -------------------
df_feat[['datetime','open','high','low','close','volume','is_injected_fraud']].to_csv("full_sim_prices.csv", index=False)
df_window.to_csv("streamlit_sim_data.csv", index=False)

# ------------------- Streamlit App -------------------
app_code = f"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import time

st.set_page_config(layout="wide", page_title="Crypto Fraud Detection Dashboard")
st.title("🚨 Crypto Fraud Detection — Simulation Demo")

# Controls
col1, col2, col3, col4 = st.columns([1,1,1,1])
with col1:
    model_sel = st.selectbox("Select Model", ["Isolation Forest","LSTM"])
with col2:
    threshold_sel = st.selectbox("Select Threshold", ["Static (97.5%)","Dynamic","Robust"])
with col3:
    speed = st.slider("Playback speed (seconds per step)",0.05,2.0,value={PLAYBACK_SPEED},step=0.05)
with col4:
    start = st.button("Start Simulation")

# Load Data
df_win = pd.read_csv("streamlit_sim_data.csv", parse_dates=['dt'])
df_price = pd.read_csv("full_sim_prices.csv", parse_dates=['datetime'])

# Set Thresholds
thr_static = df_win['score_iso'].quantile(0.975)
thr_robust = df_win['score_iso'].median() + 3*np.median(np.abs(df_win['score_iso'] - df_win['score_iso'].median()))
sensitivity = 97
thr_dynamic = np.percentile(df_win['score_iso'],sensitivity)

if threshold_sel=="Static (97.5%)":
    df_win['is_alert'] = df_win['score_iso'] > thr_static
elif threshold_sel=="Dynamic":
    df_win['is_alert'] = df_win['score_iso'] > thr_dynamic
else:
    df_win['is_alert'] = df_win['score_iso'] > thr_robust

# Select Score Based on Model
if model_sel=="Isolation Forest":
    df_win['anomaly_score'] = df_win['score_iso']
else:
    df_win['anomaly_score'] = df_win['score_lstm']

price_col, table_col, info_col = st.columns([2,1,1])
price_chart = price_col.empty()
transaction_table = table_col.empty()

total = len(df_win)
cursor = 0

while True:
    if start:
        if cursor >= total:
            st.info("Simulation finished — restart to run again.")
            break

        cur_dt = df_win['dt'].iloc[cursor]

        # --- Price Chart ---
        window_start = df_price['datetime'].iloc[max(0, cursor-24*7)]
        window_end = df_price['datetime'].iloc[min(len(df_price)-1, cursor+12)]
        view = df_price[(df_price['datetime'] >= window_start) & (df_price['datetime'] <= window_end)]
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=view['datetime'], y=view['close'], mode='lines', name='Price'))

        alerts_up_to_now = df_win[(df_win['is_alert']==True) & (df_win['dt'] <= cur_dt)]
        if len(alerts_up_to_now)>0:
            alert_times = alerts_up_to_now['dt'].tolist()
            alert_prices = []
            for at in alert_times:
                nearest = df_price.iloc[(df_price['datetime'] - pd.to_datetime(at)).abs().argsort()[:1]]
                alert_prices.append(nearest['close'].values[0])
            fig.add_trace(go.Scatter(x=alert_times, y=alert_prices, mode='markers', name='Model Alerts', marker=dict(color='red', size=8)))

        price_chart.plotly_chart(fig, use_container_width=True)

        # --- Live Transaction Table ---
        recent_transactions = df_price[df_price['datetime'] <= cur_dt].tail(20)
        transaction_table.dataframe(recent_transactions[['datetime','open','high','low','close','volume']])

        cursor += 1
        time.sleep(speed)
    else:
        break
"""

with open("app.py","w") as f:
    f.write(app_code)

# ------------------- Run Streamlit with Ngrok -------------------
try: ngrok.kill()
except: pass

os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
NGROK_AUTH_TOKEN = "31Ecjc9XKs71emAe3pDtgz1H0Db_6vUMxYjF1kXsSarm5Fraf"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)
get_ipython().system_raw("streamlit run app.py &")
public_url = ngrok.connect(8501).public_url
print("Dashboard URL:", public_url)


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/130.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m122.9/130.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.2/130.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.7/5.7 MB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m53.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m38.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m641.0/641.0 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for ta (setup.py) ... [?25l[?25hdone
Fetching historical data...
In

  super().__init__(**kwargs)


Dashboard URL: https://823a0a4ac687.ngrok-free.app


In [None]:
# === CELL 0: Notes & quick config ===
# Paste this notebook into Colab and run cells from top to bottom.
# Configure below:
USE_BIGQUERY = False      # True -> fetch Ethereum transactions from BigQuery (requires service account JSON)
ELLIPTIC_FALLBACK = True  # True -> download Elliptic dataset if no BigQuery
SAMPLE_STRATEGY = "top_n" # "top_n", "neighborhood", "random"
N_TOP = 50000             # for top_n sampling
NEIGHBORHOOD_SEED_WALLETS = []  # list of known wallets if using neighborhood strategy
RANDOM_SEED = 42
# Model/train tuning (demo values; change for real runs)
GNN_EPOCHS = 8
HYBRID_EPOCHS = 8
TEMP_EPOCHS = 6
BATCH_SIZE = 2048

# If using BigQuery, upload service account json and set path:
BQ_SERVICE_ACCOUNT_JSON = "service_account.json"  # upload to Colab via files.upload() if needed

# For speed in demo: reduce sizes
MAX_EDGES_FOR_DEMO = 200000  # adjust to limit memory


In [None]:
# === CELL 1: Basic installs (PyTorch, PyG, helpers)
# This cell tries to install packages robustly in Colab. It may take a few minutes.
# If wheel-based PyG install fails, re-run with correct torch/cuda versions per https://pytorch-geometric.readthedocs.io/
import sys, subprocess, math, os

# Upgrade pip
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip", "wheel"], stdout=subprocess.DEVNULL)

# Basic libs
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "numpy", "pandas", "matplotlib", "scikit-learn", "networkx", "pyvis", "plotly", "tqdm", "google-cloud-bigquery", "kaggle"], stdout=subprocess.DEVNULL)

# Try to rely on existing torch in Colab; if missing, install CPU or default GPU torch
try:
    import torch
    print("Found torch", torch.__version__)
except Exception:
    print("Torch not found, installing cpu torch...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "torch", "torchvision", "torchaudio"], stdout=subprocess.DEVNULL)
    import torch

TORCH = torch.__version__.split('+')[0]
CUDA = 'cpu' if torch.version.cuda is None else 'cu' + torch.version.cuda.replace('.', '')
print("Torch:", TORCH, "CUDA tag:", CUDA)

# Attempt to install matching PyG wheels from data.pyg.org
PYG_INDEX = f"https://data.pyg.org/whl/torch-{TORCH}+{CUDA}.html"
print("Attempting to install PyG from:", PYG_INDEX)
try:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "--find-links", PYG_INDEX,
                           "torch_scatter", "torch_sparse", "torch_cluster", "torch_spline_conv"], stdout=subprocess.DEVNULL)
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "torch_geometric"], stdout=subprocess.DEVNULL)
    print("Installed PyG dependencies & torch_geometric.")
except Exception as e:
    print("PyG wheel install may have failed (often depending on CUDA). Trying fallback 'pip install torch_geometric' ...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "torch_geometric"], stdout=subprocess.DEVNULL)

# Install torch_geometric_temporal optionally
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "torch_geometric_temporal"], stdout=subprocess.DEVNULL)
# Install other ML libs
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "pyyaml", "protobuf"], stdout=subprocess.DEVNULL)

print("Install step finished. Please restart runtime if imports fail.")


Found torch 2.8.0+cu126
Torch: 2.8.0 CUDA tag: cu126
Attempting to install PyG from: https://data.pyg.org/whl/torch-2.8.0+cu126.html
Installed PyG dependencies & torch_geometric.
Install step finished. Please restart runtime if imports fail.
