**Find Trends in the data for modeling forward pass**

Similar to `experiments/step_time_analysis.ipynb` we are using current batch data we gathered in profiling to estimate forward pass times with a model

In [2]:
# get data from csv into df
import pandas as pd
import json

def parse_csv(path: str) -> pd.DataFrame:
    """
    Read a CSV file and return a DataFrame.
    
    Args:
        path (str): The path to the CSV file.
        
    Returns:
        pd.DataFrame: The DataFrame containing the data from the CSV file.
    """
    df = pd.read_csv(path)
    return df

def format_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    parse the num_scheduled_tokens column from string to json to parse info on decodes and prefills
    """
    df['num_scheduled_tokens'] = df['num_scheduled_tokens'].apply(lambda x: x.replace("'", '"'))
    df['num_scheduled_tokens'] = df['num_scheduled_tokens'].apply(json.loads)

    def process_tokens(token_dict):
        """
        Process token dictionary to extract decode/prefill statistics
        """
        num_decodes = 0
        num_prefills = 0
        sum_decode_tokens = 0
        sum_prefill_tokens = 0
        
        for key, value in token_dict.items():
            if value == 1:
                # Decode
                num_decodes += 1
                sum_decode_tokens += value
            else:
                # Prefill (anything > 1)
                num_prefills += 1
                sum_prefill_tokens += value
        
        return pd.Series({
            'num_decodes': num_decodes,
            'num_prefills': num_prefills,
            'sum_decode_tokens': sum_decode_tokens,
            'sum_prefill_tokens': sum_prefill_tokens
        })

    df[['num_decodes', 'num_prefills', 'sum_decode_tokens', 'sum_prefill_tokens']] = df['num_scheduled_tokens'].apply(process_tokens)
    df.drop(columns=['num_scheduled_tokens', 'num_decodes', 'distribution', 'dataset', 'model','scheduled_new_reqs','scheduler_time', 'update_time', 'block_size', 'gpu_memory_utilization', 'num_gpu_blocks', 'enable_prefix_caching', 'max_num_sequences', 'max_model_len', 'temperature'], inplace=True) # Num_decodes is the same sum_decode_tokens, so we can drop it

    # add num_total_requests column = num_decodes + num_prefills
    df['num_total_requests'] = df['num_prefills'] + df['sum_decode_tokens']
    return df

In [3]:
path1 = 'execution_stats_50_0.0_Qwen-Qwen2.5-0.5B_sharegpt.csv'
path2 = 'execution_stats_100_0.0_Qwen-Qwen2.5-0.5B_sharegpt.csv'
path3 = 'execution_stats_250_0.0_Qwen-Qwen2.5-0.5B_sharegpt.csv'
path4 = 'execution_stats_500_0.0_Qwen-Qwen2.5-0.5B_sharegpt.csv'
path5 = 'execution_stats_1000_0.0_Qwen-Qwen2.5-0.5B_sharegpt.csv'

# parse and concat all dataframes
df1 = parse_csv(path1)
df2 = parse_csv(path2)
df3 = parse_csv(path3)
df4 = parse_csv(path4)
df5 = parse_csv(path5)
df = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)
df = format_data(df)
df.head()

Unnamed: 0,num_total_scheduled_tokens,execute_time,arrival_rate,num_prefills,sum_decode_tokens,sum_prefill_tokens,num_total_requests
0,1,0.005617,50,0,1,0,1
1,1,0.005714,50,0,1,0,1
2,1,0.005747,50,0,1,0,1
3,1,0.005662,50,0,1,0,1
4,1,0.00566,50,0,1,0,1


In [4]:
# try to find correlation between execute_time and num_decodes, num_prefills, sum_decode_tokens, sum_prefill_tokens
correlation = df[['execute_time', 'num_prefills', 'sum_decode_tokens', 'sum_prefill_tokens', 'arrival_rate']].corr()
print(correlation)

                    execute_time  num_prefills  sum_decode_tokens  \
execute_time            1.000000      0.733713           0.780243   
num_prefills            0.733713      1.000000           0.424719   
sum_decode_tokens       0.780243      0.424719           1.000000   
sum_prefill_tokens      0.819231      0.804882           0.390751   
arrival_rate           -0.004300      0.011001           0.001811   

                    sum_prefill_tokens  arrival_rate  
execute_time                  0.819231     -0.004300  
num_prefills                  0.804882      0.011001  
sum_decode_tokens             0.390751      0.001811  
sum_prefill_tokens            1.000000      0.010560  
arrival_rate                  0.010560      1.000000  


In [11]:
# plot execute_time vs num_decodes
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.scatter(df['execute_time'], df['sum_decode_tokens'], alpha=0.5, label='Num Decodes')
# plt.scatter(df['execute_time'], df['sum_prefill_tokens'], alpha=0.5, label='Num Total Reqs', color='orange')
# plt.scatter(df['execute_time'], df['sum_prefill_tokens'], alpha=0.5, label='Sum Prefill Tokens', color='green')
plt.xlabel('Execute Time')
plt.ylabel('Number of Tokens')
plt.title('Execute Time vs Number of Tokens')
plt.legend()
plt.show()

ValueError: Key backend: 'module://matplotlib_inline.backend_inline' is not a valid value for backend; supported values are ['gtk3agg', 'gtk3cairo', 'gtk4agg', 'gtk4cairo', 'macosx', 'nbagg', 'notebook', 'qtagg', 'qtcairo', 'qt5agg', 'qt5cairo', 'tkagg', 'tkcairo', 'webagg', 'wx', 'wxagg', 'wxcairo', 'agg', 'cairo', 'pdf', 'pgf', 'ps', 'svg', 'template']

In [10]:
# train a linear regression model to predict execute_time based on num_prefills, sum_decode_tokens, sum_prefill_tokens, and arrival_rate
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
X = df[['sum_decode_tokens', 'sum_prefill_tokens']]
y = df['execute_time']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
# print the coefficients
print("Coefficients:", model.coef_)
# print the intercept
print("Intercept:", model.intercept_)
# print the score of the model
print("Score:", model.score(X_test, y_test))
# make predictions on the test set
y_pred = model.predict(X_test)
# plot the predictions vs the actual values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel('Actual Execute Time')
plt.ylabel('Predicted Execute Time')
plt.title('Actual vs Predicted Execute Time')
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2)
plt.show()

Coefficients: [7.35301969e-05 2.42771696e-05]
Intercept: 0.004500796222933236
Score: 0.9228033716114614


NameError: name 'plt' is not defined