<a href="https://colab.research.google.com/github/aisha-sk/ml-data-thing/blob/main/mlcachedata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

np.random.seed(42)

num_samples = 1000

data = {
    "load_store_ratio": np.random.uniform(0.1, 1.0, num_samples),
    "cache_miss_rate_L1": np.random.uniform(0.01, 0.5, num_samples),
    "cache_miss_rate_L2": np.random.uniform(0.01, 0.4, num_samples),
    "cache_miss_rate_L3": np.random.uniform(0.01, 0.3, num_samples),
    "memory_access_pattern": np.random.choice([0, 1], num_samples),
    "previous_fetch_size": np.random.randint(32, 512, num_samples),
}

data["optimal_fetch_size"] = (
    64
    + (data["load_store_ratio"] * 128)
    - (data["cache_miss_rate_L1"] * 50)
    - (data["cache_miss_rate_L2"] * 30)
    - (data["cache_miss_rate_L3"] * 20)
    + (data["memory_access_pattern"] * 40)
).astype(int)

df = pd.DataFrame(data)

X = df.drop(columns=["optimal_fetch_size"])
y = df["optimal_fetch_size"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=900, random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)

print(f"Model MSE: {mse}")


Model MSE: 10.315279000000006


In [32]:
new_data = pd.DataFrame({
    "load_store_ratio": [0.5],
    "cache_miss_rate_L1": [0.2],
    "cache_miss_rate_L2": [0.15],
    "cache_miss_rate_L3": [0.1],
    "memory_access_pattern": [1],
    "previous_fetch_size": [256]
})

predicted_fetch_size = model.predict(new_data)
print(f"Predicted Fetch Size: {predicted_fetch_size[0]} KB")


Predicted Fetch Size: 149.57 KB
