# MT5 Report Analysis

This notebook parses the MT5 strategy tester report (HTML) and performs a streak analysis.

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import json
import matplotlib.pyplot as plt
import os

%matplotlib inline

## 1. Parse HTML Report

We extract the 'Deals' table which contains the profit information.

In [None]:
# File path
file_path = "../data/TicksReales.NASDAQ_con100MM_conRiesgoSimetrico25Percent__ReportTester-3000086403.html"
json_output_path = "../data/parsed_trades.json"

# Read Content
try:
    with open(file_path, "r", encoding="utf-16") as f:
        content = f.read()
except UnicodeError:
    with open(file_path, "r", encoding="utf-8") as f:
        content = f.read()

soup = BeautifulSoup(content, "lxml") # using lxml for speed if available, else html.parser

# Find Tables
tables = soup.find_all("table")
print(f"Found {len(tables)} tables")

# Locate Deals Table
deals_data = []
if len(tables) > 1:
    # Usually the second table or the one with 'Profit' header
    main_table = tables[1]
    rows = main_table.find_all("tr")
    
    header_index = -1
    headers = []
    
    # Search for header
    for i, row in enumerate(rows):
        cells = [c.get_text(strip=True) for c in row.find_all(["th", "td"])]
        if "Profit" in cells and "Time" in cells:
            header_index = i
            headers = cells
            print(f"Header found at row {i}: {headers}")
            break
            
    # Extract Data
    if header_index != -1:
        for row in rows[header_index+1:]:
            cells = [c.get_text(strip=True) for c in row.find_all("td")]
            if len(cells) != len(headers):
                continue
            
            record = dict(zip(headers, cells))
            deals_data.append(record)
            
print(f"Extracted {len(deals_data)} deals")

# Normalize Profit
# MT5 numbers might have spaces or weird formats
def clean_number(s):
    if not s: return 0.0
    s = s.replace(" ", "").replace(",", ".") # Handle potential european format if strictly needed, but MT5 reports usually use dots or spaces
    # Sometimes negatives are weird, but usually standard -100.00
    try:
        return float(s)
    except:
        return 0.0

for d in deals_data:
    d['Profit_Clean'] = clean_number(d.get('Profit', '0'))

# Save to JSON
with open(json_output_path, "w", encoding="utf-8") as f:
    json.dump(deals_data, f, indent=2, ensure_ascii=False)
    
print(f"Saved to {json_output_path}")

Found 2 tables
Header found at row 1767: ['Time', 'Deal', 'Symbol', 'Type', 'Direction', 'Volume', 'Price', 'Order', 'Commission', 'Swap', 'Profit', 'Balance', 'Comment']
Extracted 1763 deals


FileNotFoundError: [Errno 2] No such file or directory: 'data/parsed_trades.json'

## 2. Streak Analysis

We define a 'streak' as consecutive trades with the same result (Positive or Negative).

In [None]:
df = pd.DataFrame(deals_data)

# Filter for 'out' deals (realized profit)
trades = df[df['Direction'] == 'out'].copy()
print(f"Trades with P/L: {len(trades)}")

# Determine Win/Loss
trades['Result'] = trades['Profit_Clean'].apply(lambda x: 'Win' if x > 0 else 'Loss')

# Calculate Streaks
# We group by consecutive identical results
streaks = []
current_streak_type = None
current_streak_count = 0

for res in trades['Result']:
    if res == current_streak_type:
        current_streak_count += 1
    else:
        if current_streak_type is not None:
            streaks.append({'Type': current_streak_type, 'Count': current_streak_count})
        current_streak_type = res
        current_streak_count = 1
        
# Append last streak
if current_streak_type is not None:
    streaks.append({'Type': current_streak_type, 'Count': current_streak_count})
    
streak_df = pd.DataFrame(streaks)
print(streak_df.head())

## 3. Visualization

Histogram of streak lengths.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

if not streak_df.empty:
    # Original Histogram Plot
    plt.figure(figsize=(12, 6))
    
    win_streaks = streak_df[streak_df['Type'] == 'Win']['Count']
    loss_streaks = streak_df[streak_df['Type'] == 'Loss']['Count']
    
    max_streak_len = streak_df['Count'].max()
    bins_range = range(1, max(max_streak_len, 5) + 2)

    plt.hist([win_streaks, loss_streaks], label=['Winning Streaks', 'Losing Streaks'], 
             color=['green', 'red'], alpha=0.7, bins=bins_range, align='left')
    
    plt.title('Distribution of Winning and Losing Streaks')
    plt.xlabel('Streak Length')
    plt.ylabel('Frequency')
    plt.legend()
    plt.xticks(range(1, max(max_streak_len, 5) + 1))
    plt.grid(axis='y', alpha=0.3)
    plt.show()

    # Pyramid Chart
    plt.figure(figsize=(12, 8))
    # Calculate frequency of each streak length
    win_streak_counts = streak_df[streak_df['Type'] == 'Win']['Count'].value_counts().sort_index()
    loss_streak_counts = streak_df[streak_df['Type'] == 'Loss']['Count'].value_counts().sort_index()

    # Get all unique streak lengths for consistent plotting
    all_streak_lengths = sorted(list(set(win_streak_counts.index).union(set(loss_streak_counts.index))))
    
    # Prepare data for plotting (negative frequencies for losses)
    win_freq = [win_streak_counts.get(length, 0) for length in all_streak_lengths]
    loss_freq = [-loss_streak_counts.get(length, 0) for length in all_streak_lengths]

    # Create horizontal bar chart
    plt.barh(all_streak_lengths, win_freq, color='green', label='Winning Streaks')
    plt.barh(all_streak_lengths, loss_freq, color='red', label='Losing Streaks')

    # Add frequency numbers inside the bars
    for i, length in enumerate(all_streak_lengths):
        if win_freq[i] > 0:
            plt.text(win_freq[i], length, str(win_freq[i]), va='center', ha='left', color='black', fontsize=9)
        if loss_freq[i] < 0:
            plt.text(loss_freq[i], length, str(abs(loss_freq[i])), va='center', ha='right', color='black', fontsize=9)

    plt.title('Distribution of Winning and Losing Streaks (Pyramid Style)')
    plt.xlabel('Frequency')
    plt.ylabel('Streak Length')
    plt.yticks(all_streak_lengths) # Ensure all streak lengths are explicitly shown
    plt.axvline(0, color='black', linewidth=0.8) # Add a vertical line at 0
    plt.grid(axis='x', alpha=0.3)
    plt.legend()
    plt.show()

else:
    print("No trades found to analyze.")