# 📈 Backtesting Strategy using Yahoo Finance Data
This notebook loads stock ranking signals, fetches real stock prices from Yahoo Finance, 
processes the signals into a long/short portfolio, and backtests the strategy.

In [None]:
!pip install yfinance pandas numpy matplotlib

## 📌 Step 1: Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

## 📌 Step 2: Load and Prepare Data

In [None]:
# Load CSV file containing trend ranking data
df = pd.read_csv('https://storage.googleapis.com/antedata_open/AllDat_excerpt_ticker.csv')

# Filter timeseries where source is 'Aggregate'
df = df[df['source'] == 'Aggregate'][['date', 'ticker', 'TrendRank']]

# Reshape data from long to wide format (pivot table)
df = df.pivot(index='date', columns='ticker', values='TrendRank')

# Convert date column to datetime format
df.index = pd.to_datetime(df.index)

# Adjust signals to align with Fridays
df.index = df.index + pd.DateOffset(days=4)

# Display first few rows
df.head()

## 📌 Step 3: Retrieve Stock Prices from Yahoo Finance

In [None]:
# Extract tickers
tickers = df.columns.tolist()

# Validate tickers by checking if they exist on Yahoo Finance
valid_tickers = []
for ticker in tickers:
    try:
        test_data = yf.Ticker(ticker).history(period='1d')
        if not test_data.empty:
            valid_tickers.append(ticker)
    except:
        continue  # Skip invalid tickers

if not valid_tickers:
    raise ValueError('No valid tickers found on Yahoo Finance!')

print(f'Valid tickers found: {valid_tickers}')

# Fetch historical prices
raw_prices = yf.download(valid_tickers, start=df.index.min().strftime('%Y-%m-%d'), 
                          end=df.index.max().strftime('%Y-%m-%d'))

# Print raw Yahoo Finance data structure
print('Raw Yahoo Finance Data Structure:')
print(raw_prices.head())

## 📌 Step 4: Process Stock Prices

In [None]:
# Ensure 'Adj Close' exists, if not, use 'Close'
if 'Adj Close' in raw_prices.columns.levels[0]:
    prices = raw_prices['Adj Close']
elif 'Close' in raw_prices.columns.levels[0]:
    prices = raw_prices['Close']
else:
    raise KeyError("Neither 'Adj Close' nor 'Close' found in Yahoo Finance data!")

# Forward-fill missing values
prices = prices.ffill()

# Compute daily percentage change
returns = prices.pct_change()

# Drop tickers with excessive missing values
returns = returns.dropna(axis=1, thresh=int(0.9 * len(returns)))

# Display processed returns
returns.head()

## 📌 Step 5: Backtesting the Strategy

In [None]:
SIG = df.copy()

# Create long/short portfolio of best/worst ranked stocks
SIG[:] = SIG.rank(axis=1, method='first')
SIG[SIG < (0.2 * SIG.shape[1])] = -0.03
SIG[SIG > (0.8 * SIG.shape[1])] = 0.03
SIG[SIG > 1] = 0

# Harmonize columns of Signal and Returns
common_cols = SIG.columns.intersection(returns.columns)
SIG = SIG[common_cols]
returns = returns[common_cols]

# Ensure signals align with returns
SIG = SIG.reindex(returns.index, method='ffill')

# Calculate backtest returns
OUT = SIG.shift(1).multiply(returns, axis=0)

# Remove first 3 rows
OUT = OUT.iloc[3:]

# Display first few rows of output
OUT.head()

## 📌 Step 6: Visualizing the Strategy Performance

In [None]:
plt.figure(figsize=(10,5))
plt.plot(np.cumsum(OUT.sum(axis=1)), label='Strategy Cumulative Returns')
plt.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
plt.title('Backtest Returns with Yahoo Finance Prices')
plt.xlabel('Date')
plt.ylabel('Cumulative Returns')
plt.legend()
plt.show()