# Module 4: Applying 1st and Higher Order Markov Chains
## LA Lakers Game Stats

Name: 
Date:


In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import random

### functions you'll need. 

In [2]:
# function to pick between two states, given their mutual probabilities

def pick_state(var_a, prob_a, var_b, prob_b):

    # Validate input probabilities
    if not (0 <= prob_a <= 1 and 0 <= prob_b <= 1):
        raise ValueError("Input probabilities must be between 0 and 1")

    # Calculate the cumulative probabilities
    cum_prob_a = prob_a
    cum_prob_b = prob_a + prob_b

    # Generate a random number between 0 and 1
    rand = random.random()
    
    print(cum_prob_a, cum_prob_b, rand)
    
    # Determine which character to pick based on the random number
    if rand < cum_prob_a:
        return var_a
    elif rand <= cum_prob_b:
        return var_b
    else:
        raise Exception("Unexpected error: random number outside [0, 1]")
        

In [3]:
# test function with sample input values. Run this cell multiple times, you should see both A and B appearing

pick_state('A', 0.3, 'B', 0.7)

0.3 1.0 0.7990777681458257


'B'

In [4]:
# function to get all sequences (even overlapping) of length n in a given string named 'sequence'

def get_n_letter_sequences(sequence, n):
    
    return [sequence[i:i+n] for i in range(len(sequence) - n + 1)]


In [5]:
# test function with sample string

get_n_letter_sequences('ABBBBABABA', 3)

['ABB', 'BBB', 'BBB', 'BBA', 'BAB', 'ABA', 'BAB', 'ABA']

## read data into dataframe

In [6]:
df_LAL = pd.read_csv('LALakers_21-23-1-1.csv')
df_LAL

Unnamed: 0.1,Unnamed: 0,date,result,season
0,0,2021-10-19,L,21-22
1,1,2021-10-22,L,21-22
2,2,2021-10-24,W,21-22
3,3,2021-10-26,W,21-22
4,4,2021-10-27,L,21-22
...,...,...,...,...
159,159,2023-04-02,,22-23
160,160,2023-04-04,,22-23
161,161,2023-04-05,,22-23
162,162,2023-04-07,,22-23


## 1st order Markov Chain

### training and test data

In [7]:
# train only on 2021-22 first half

seq_train = ''.join(df_LAL.loc[df_LAL.season=='21-22'][:40]['result'].values)
seq_train

'LLWWLWWWLLWWLWLLLWLWLWWLWLWWWLLLLLWLWWWW'

In [8]:
# get test data - second half of 21-22 season

seq_test = df_LAL.loc[df_LAL.season=='21-22'][40:]['result'].values
seq_test

array(['L', 'L', 'L', 'W', 'L', 'W', 'L', 'W', 'L', 'L', 'L', 'W', 'L',
       'W', 'L', 'L', 'L', 'W', 'L', 'L', 'L', 'L', 'W', 'L', 'L', 'W',
       'L', 'L', 'L', 'W', 'L', 'W', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
       'L', 'W', 'W'], dtype=object)

### generate transition matrix of order-1 Markov

In [9]:

# get all sequences of length 1 in the training data
seq_1_list        = np.array(get_n_letter_sequences(seq_train, 1))
unique_seq_1_list = np.sort(np.unique(seq_1_list))

# get all sequences of length 2 in the training data
seq_1plus1_list        = np.array(get_n_letter_sequences(seq_train, 2))
unique_seq_1plus1_list = np.sort(np.unique(seq_1plus1_list))

# create an empty matrix that will eventually hold the transition probabilities
arr_tr_mat = np.zeros((2, unique_seq_1_list.shape[0]))

print ('seq-o' + str(1), '\tseq-o' + str(1+1), '\tcount', '\tprob_of_seq-o' + str(1+1))

for i, seq_n in enumerate(unique_seq_1_list):
    
    # possible next states appended to seq_n. There are only two possibilities
    seq_1plus1_0 = seq_n + 'L'
    seq_1plus1_1 = seq_n + 'W'
    
    # Count all occurrences of the above sequences in all the n+1 length sequences
    count_seq_1plus1_0 = (seq_1plus1_list == seq_1plus1_0).sum()
    count_seq_1plus1_1 = (seq_1plus1_list == seq_1plus1_1).sum()
    
    # get ratios of occurrences of each n+1 length sequence
    prob_seq_1plus1_0 = count_seq_1plus1_0/(count_seq_1plus1_0+count_seq_1plus1_1)
    prob_seq_1plus1_1 = count_seq_1plus1_1/(count_seq_1plus1_0+count_seq_1plus1_1)
    
    # to see the output
    print (seq_n, '\t', seq_1plus1_0, '\t', count_seq_1plus1_0, '\t', round(prob_seq_1plus1_0, 3))
    print (seq_n, '\t', seq_1plus1_1, '\t', count_seq_1plus1_1, '\t', round(prob_seq_1plus1_1, 3))
    
    arr_tr_mat[0][i] = prob_seq_1plus1_0
    arr_tr_mat[1][i] = prob_seq_1plus1_1
    
    
print ('\nTransition Matrix = ')
print (arr_tr_mat)

seq-o1 	seq-o2 	count 	prob_of_seq-o2
L 	 LL 	 8 	 0.421
L 	 LW 	 11 	 0.579
W 	 WL 	 10 	 0.5
W 	 WW 	 10 	 0.5

Transition Matrix = 
[[0.42105263 0.5       ]
 [0.57894737 0.5       ]]


### generate predictions and accuracy score for a single stretch

In [10]:
# generate predictions and accuracy score

# empty array to hold predictions
predictions = []

# last known seq-n state
seq_start = seq_train[-1:]
print ('starting state sequence:', seq_start)

# make N forward predictions. make it the same length as the test data of known outcomes
N = len(seq_test)

for i in range (0, N):
    
    # define the state vector such that there is a '1' corresponding to the value of the starting sequence
    # the '1' should be placed in the row number corresponding to the column number of the starting sequence in the transition matrix 
    # (or unique_seq_1plus1_list)
    state_start = np.where(unique_seq_1_list == seq_start, 1, 0)
    
    # calculate the probabilities of the next state
    state_next_probs = arr_tr_mat @ state_start
    
    # based on the probabilities, pick a state
    state_next = pick_state('L', state_next_probs[0], 'W', state_next_probs[1])
    # print ('predicted state next:', state_next, 'prob(L)=',round(state_next_probs[0], 2))
    
    # the next sequence will now include the forecasted state
    # add next state to the current start state, then take only the last n-state sequence to be the next start state
    seq_start = (seq_start + state_next)[-1:]
    
    # print ('next starting state sequence:', seq_start)
    predictions.append(state_next)
    
print ('\n>> forecast of ' + str(len(seq_test)) + ' states')
print (predictions)

# make a new dataframe holding the actual known outcomes (seq_test) and the predictions. 
df_validation = pd.DataFrame({'actual': seq_test, 'predicted': np.array(predictions)})

# get accuracy of individual predictions
acc = df_validation.loc[df_validation.actual==df_validation.predicted].shape[0]/df_validation.shape[0]

print ('accuracy rate: ', round(acc, 3))

starting state sequence: W
0.5 1.0 0.2961921418311545
0.42105263157894735 1.0 0.24340786317069585
0.42105263157894735 1.0 0.43165735917034553
0.5 1.0 0.06081814029761712
0.42105263157894735 1.0 0.8476353616530946
0.5 1.0 0.477160832460912
0.42105263157894735 1.0 0.7011002694783188
0.5 1.0 0.8361973859872915
0.5 1.0 0.14521389324815137
0.42105263157894735 1.0 0.9879824136184703
0.5 1.0 0.9852808341283527
0.5 1.0 0.5079393457327014
0.5 1.0 0.6315671022841509
0.5 1.0 0.06214759835361128
0.42105263157894735 1.0 0.7078561223932164
0.5 1.0 0.24819403530094586
0.42105263157894735 1.0 0.8206154759097367
0.5 1.0 0.7659030417438731
0.5 1.0 0.7812230428002002
0.5 1.0 0.6204311259007351
0.5 1.0 0.21348079992282953
0.42105263157894735 1.0 0.8601999890590494
0.5 1.0 0.2395262802295609
0.42105263157894735 1.0 0.3719874209024753
0.42105263157894735 1.0 0.7213562866133447
0.5 1.0 0.7531878258181838
0.5 1.0 0.6411902031418549
0.5 1.0 0.9210858335500394
0.5 1.0 0.33860215675966254
0.42105263157894735 1.0

### Your work below

In [11]:
# Q4

def get_transition_matrix_order_n(seq_train, order):
    # get all sequences of length n (order) in the training data
    seq_n_list = np.array(get_n_letter_sequences(seq_train, order))
    unique_seq_n_list = np.sort(np.unique(seq_n_list))
    
    # get all sequences of length (n+1) in the training data
    seq_nplus1_list = np.array(get_n_letter_sequences(seq_train, order + 1))
    unique_seq_nplus1_list = np.sort(np.unique(seq_nplus1_list))
    
    # create an empty matrix that will eventually hold the transition probabilities
    arr_tr_mat = np.zeros((2, len(unique_seq_n_list)))
    
    print('seq-o' + str(order), '\tseq-o' + str(order + 1), '\tcount', '\tprob_of_seq-o' + str(order + 1))

    # Loop through each unique sequence of length n
    for i, seq_n in enumerate(unique_seq_n_list):
        # possible next states appended to seq_n. There are only two possibilities
        seq_nplus1_0 = seq_n + 'L'
        seq_nplus1_1 = seq_n + 'W'
        
        # count all occurrences of the above sequences in all the n+1 length sequences
        count_seq_nplus1_0 = (seq_nplus1_list == seq_nplus1_0).sum()
        count_seq_nplus1_1 = (seq_nplus1_list == seq_nplus1_1).sum()
        total_count = count_seq_nplus1_0 + count_seq_nplus1_1
        
        # get ratios of occurrences of each n+1 length sequence
        prob_seq_nplus1_0 = count_seq_nplus1_0 / (count_seq_nplus1_0 + count_seq_nplus1_1) if total_count != 0 else 0
        prob_seq_nplus1_1 = count_seq_nplus1_1 / (count_seq_nplus1_0 + count_seq_nplus1_1) if total_count != 0 else 0
        
        # print the output for each sequence
        print(seq_n, '\t', seq_nplus1_0, '\t', count_seq_nplus1_0, '\t', round(prob_seq_nplus1_0, 3))
        print(seq_n, '\t', seq_nplus1_1, '\t', count_seq_nplus1_1, '\t', round(prob_seq_nplus1_1, 3))
        
        # assign transition probabilities to the transition matrix
        arr_tr_mat[0][i] = prob_seq_nplus1_0
        arr_tr_mat[1][i] = prob_seq_nplus1_1
    
    print('\nTransition Matrix = ')
    print(arr_tr_mat)
    
    return arr_tr_mat

In [12]:
get_transition_matrix_order_n(seq_train, 1)

seq-o1 	seq-o2 	count 	prob_of_seq-o2
L 	 LL 	 8 	 0.421
L 	 LW 	 11 	 0.579
W 	 WL 	 10 	 0.5
W 	 WW 	 10 	 0.5

Transition Matrix = 
[[0.42105263 0.5       ]
 [0.57894737 0.5       ]]


array([[0.42105263, 0.5       ],
       [0.57894737, 0.5       ]])

In [13]:
get_transition_matrix_order_n(seq_train, 2)

seq-o2 	seq-o3 	count 	prob_of_seq-o3
LL 	 LLL 	 4 	 0.5
LL 	 LLW 	 4 	 0.5
LW 	 LWL 	 5 	 0.455
LW 	 LWW 	 6 	 0.545
WL 	 WLL 	 3 	 0.3
WL 	 WLW 	 7 	 0.7
WW 	 WWL 	 5 	 0.556
WW 	 WWW 	 4 	 0.444

Transition Matrix = 
[[0.5        0.45454545 0.3        0.55555556]
 [0.5        0.54545455 0.7        0.44444444]]


array([[0.5       , 0.45454545, 0.3       , 0.55555556],
       [0.5       , 0.54545455, 0.7       , 0.44444444]])

In [14]:
get_transition_matrix_order_n(seq_train, 3)

seq-o3 	seq-o4 	count 	prob_of_seq-o4
LLL 	 LLLL 	 2 	 0.5
LLL 	 LLLW 	 2 	 0.5
LLW 	 LLWL 	 2 	 0.5
LLW 	 LLWW 	 2 	 0.5
LWL 	 LWLL 	 1 	 0.2
LWL 	 LWLW 	 4 	 0.8
LWW 	 LWWL 	 3 	 0.5
LWW 	 LWWW 	 3 	 0.5
WLL 	 WLLL 	 2 	 0.667
WLL 	 WLLW 	 1 	 0.333
WLW 	 WLWL 	 3 	 0.429
WLW 	 WLWW 	 4 	 0.571
WWL 	 WWLL 	 2 	 0.4
WWL 	 WWLW 	 3 	 0.6
WWW 	 WWWL 	 2 	 0.667
WWW 	 WWWW 	 1 	 0.333

Transition Matrix = 
[[0.5        0.5        0.2        0.5        0.66666667 0.42857143
  0.4        0.66666667]
 [0.5        0.5        0.8        0.5        0.33333333 0.57142857
  0.6        0.33333333]]


array([[0.5       , 0.5       , 0.2       , 0.5       , 0.66666667,
        0.42857143, 0.4       , 0.66666667],
       [0.5       , 0.5       , 0.8       , 0.5       , 0.33333333,
        0.57142857, 0.6       , 0.33333333]])

In [15]:
# Q5

def get_prediction_accuracy(seq_train, seq_test, order):
    
    # call the nested function to get the transition matrix
    arr_tr_mat = get_transition_matrix_order_n(seq_train, order)
    
    # fetch necessary variables from the transition matrix
    seq_n_list = np.array(get_n_letter_sequences(seq_train, order))
    unique_seq_n_list = np.sort(np.unique(seq_n_list))
    
    # empty array to hold predictions
    predictions = []

    # last known seq-n state
    seq_start = seq_train[-order:]

    # make N forward predictions. make it the same length as the test data of known outcomes
    N = len(seq_test)

    for i in range(N):
        # define the state vector such that there is a '1' corresponding to the value of the starting sequence
        # the '1' should be placed in the row number corresponding to the column number of the starting sequence in the transition matrix 
        # (or unique_seq_1plus1_list)
        state_start = np.where(unique_seq_n_list == seq_start, 1, 0)

        # calculate the probabilities of the next state
        state_next_probs = arr_tr_mat @ state_start
        
        # based on the probabilities, pick a state
        state_next = pick_state('L', state_next_probs[0], 'W', state_next_probs[1])
        
#         print ('predicted state next:', state_next, 'prob(L)=',round(state_next_probs[0], 2))

        # the next sequence will now include the forecasted state
        # add next state to the current start state, then take only the last n-state sequence to be the next start state
        seq_start = (seq_start + state_next)[-order:]

        predictions.append(state_next)
        
    print ('\n>> forecast of ' + str(len(seq_test)) + ' states')
    print (predictions)

    # make a new dataframe holding the actual known outcomes (seq_test) and the predictions. 
    df_validation = pd.DataFrame({'actual': seq_test, 'predicted': np.array(predictions)})

    # get accuracy of individual predictions
    acc = df_validation.loc[df_validation.actual == df_validation.predicted].shape[0] / df_validation.shape[0]

    return acc
    print('Accuracy rate:', round(acc, 3))

In [16]:
get_prediction_accuracy(seq_train, seq_test, 1)

seq-o1 	seq-o2 	count 	prob_of_seq-o2
L 	 LL 	 8 	 0.421
L 	 LW 	 11 	 0.579
W 	 WL 	 10 	 0.5
W 	 WW 	 10 	 0.5

Transition Matrix = 
[[0.42105263 0.5       ]
 [0.57894737 0.5       ]]
0.5 1.0 0.28999199174259804
0.42105263157894735 1.0 0.19213250069206678
0.42105263157894735 1.0 0.12758307152005832
0.42105263157894735 1.0 0.03328188403462473
0.42105263157894735 1.0 0.4052325440393423
0.42105263157894735 1.0 0.7977340243635631
0.5 1.0 0.6185702814148093
0.5 1.0 0.1248339074532806
0.42105263157894735 1.0 0.9292206707788695
0.5 1.0 0.20844927349843312
0.42105263157894735 1.0 0.17169106623759578
0.42105263157894735 1.0 0.5489136111146745
0.5 1.0 0.13442194219012282
0.42105263157894735 1.0 0.14314076826203215
0.42105263157894735 1.0 0.06046900768563024
0.42105263157894735 1.0 0.0291016705758671
0.42105263157894735 1.0 0.1969123632442673
0.42105263157894735 1.0 0.9241548396729155
0.5 1.0 0.49199460054512245
0.42105263157894735 1.0 0.49733260707333626
0.5 1.0 0.9313411176311154
0.5 1.0 0.49

0.5714285714285714

In [17]:
get_prediction_accuracy(seq_train, seq_test, 2)

seq-o2 	seq-o3 	count 	prob_of_seq-o3
LL 	 LLL 	 4 	 0.5
LL 	 LLW 	 4 	 0.5
LW 	 LWL 	 5 	 0.455
LW 	 LWW 	 6 	 0.545
WL 	 WLL 	 3 	 0.3
WL 	 WLW 	 7 	 0.7
WW 	 WWL 	 5 	 0.556
WW 	 WWW 	 4 	 0.444

Transition Matrix = 
[[0.5        0.45454545 0.3        0.55555556]
 [0.5        0.54545455 0.7        0.44444444]]
0.5555555555555556 1.0 0.4923327784804844
0.3 1.0 0.2691122698898235
0.5 1.0 0.18856161337083654
0.5 1.0 0.48048382391172584
0.5 1.0 0.7877165027425387
0.45454545454545453 1.0 0.7573337529278523
0.5555555555555556 1.0 0.5521383428604549
0.3 1.0 0.2377614782697074
0.5 1.0 0.7174946962052332
0.45454545454545453 1.0 0.3875456617840033
0.3 1.0 0.698078614245388
0.45454545454545453 1.0 0.5024349252735761
0.5555555555555556 1.0 0.9988559832732982
0.5555555555555556 1.0 0.9590034993336306
0.5555555555555556 1.0 0.2055214560840446
0.3 1.0 0.3800267301528588
0.45454545454545453 1.0 0.7578930654054613
0.5555555555555556 1.0 0.4518952151665161
0.3 1.0 0.7120827112125181
0.454545454545454

0.5238095238095238

In [18]:
get_prediction_accuracy(seq_train, seq_test, 3)

seq-o3 	seq-o4 	count 	prob_of_seq-o4
LLL 	 LLLL 	 2 	 0.5
LLL 	 LLLW 	 2 	 0.5
LLW 	 LLWL 	 2 	 0.5
LLW 	 LLWW 	 2 	 0.5
LWL 	 LWLL 	 1 	 0.2
LWL 	 LWLW 	 4 	 0.8
LWW 	 LWWL 	 3 	 0.5
LWW 	 LWWW 	 3 	 0.5
WLL 	 WLLL 	 2 	 0.667
WLL 	 WLLW 	 1 	 0.333
WLW 	 WLWL 	 3 	 0.429
WLW 	 WLWW 	 4 	 0.571
WWL 	 WWLL 	 2 	 0.4
WWL 	 WWLW 	 3 	 0.6
WWW 	 WWWL 	 2 	 0.667
WWW 	 WWWW 	 1 	 0.333

Transition Matrix = 
[[0.5        0.5        0.2        0.5        0.66666667 0.42857143
  0.4        0.66666667]
 [0.5        0.5        0.8        0.5        0.33333333 0.57142857
  0.6        0.33333333]]
0.6666666666666666 1.0 0.942246210355752
0.6666666666666666 1.0 0.7171855724376861
0.6666666666666666 1.0 0.46545459513915055
0.4 1.0 0.3276984101405066
0.6666666666666666 1.0 0.23316194603940388
0.5 1.0 0.6877053717289318
0.5 1.0 0.5745156089552124
0.5 1.0 0.47146063336644506
0.4 1.0 0.6271179221086763
0.42857142857142855 1.0 0.23976438001524036
0.2 1.0 0.15931943940746796
0.6666666666666666 1.0 0.274

0.42857142857142855

In [19]:
# Q6

num_simulations = 10

# initialize a list to store the results
results = []

# Loop through each order
for order in range(1, 5):
    # initialize a list to store accuracies for this order
    accuracies_for_order = []
    
    # perform multiple simulations
    for _ in range(num_simulations):
        # get the accuracy for this simulation and order
        accuracy = get_prediction_accuracy(seq_train, seq_test, order)
        
        # append the accuracy to the list of accuracies for this order
        accuracies_for_order.append(accuracy)
    
    # store the order number and accuracies in the results list
    results.append({'order': order, 'accuracies': accuracies_for_order})

# convert the results list to a pandas DataFrame for easier manipulation and visualization
results_df = pd.DataFrame(results)

# I've checked and tried to debug but still have an error of 'Unexpected error: random number outside [0, 1]. I have no idea on the reason.


seq-o1 	seq-o2 	count 	prob_of_seq-o2
L 	 LL 	 8 	 0.421
L 	 LW 	 11 	 0.579
W 	 WL 	 10 	 0.5
W 	 WW 	 10 	 0.5

Transition Matrix = 
[[0.42105263 0.5       ]
 [0.57894737 0.5       ]]
0.5 1.0 0.585638917142396
0.5 1.0 0.8698970244609899
0.5 1.0 0.019209164121104072
0.42105263157894735 1.0 0.5780577403506603
0.5 1.0 0.7422167901768146
0.5 1.0 0.22156867323017926
0.42105263157894735 1.0 0.308590166596615
0.42105263157894735 1.0 0.1593413636646086
0.42105263157894735 1.0 0.4789098974928837
0.5 1.0 0.7084787684646894
0.5 1.0 0.5435730132189606
0.5 1.0 0.14364834207187394
0.42105263157894735 1.0 0.1081052948043365
0.42105263157894735 1.0 0.4771451627554041
0.5 1.0 0.9433258807360251
0.5 1.0 0.059763267232927575
0.42105263157894735 1.0 0.442993703961279
0.5 1.0 0.8616143403862037
0.5 1.0 0.9532847487555451
0.5 1.0 0.43190117808732653
0.42105263157894735 1.0 0.9684394822331539
0.5 1.0 0.33631531570656914
0.42105263157894735 1.0 0.600509233072566
0.5 1.0 0.24629168700816484
0.421052631578947

seq-o3 	seq-o4 	count 	prob_of_seq-o4
LLL 	 LLLL 	 2 	 0.5
LLL 	 LLLW 	 2 	 0.5
LLW 	 LLWL 	 2 	 0.5
LLW 	 LLWW 	 2 	 0.5
LWL 	 LWLL 	 1 	 0.2
LWL 	 LWLW 	 4 	 0.8
LWW 	 LWWL 	 3 	 0.5
LWW 	 LWWW 	 3 	 0.5
WLL 	 WLLL 	 2 	 0.667
WLL 	 WLLW 	 1 	 0.333
WLW 	 WLWL 	 3 	 0.429
WLW 	 WLWW 	 4 	 0.571
WWL 	 WWLL 	 2 	 0.4
WWL 	 WWLW 	 3 	 0.6
WWW 	 WWWL 	 2 	 0.667
WWW 	 WWWW 	 1 	 0.333

Transition Matrix = 
[[0.5        0.5        0.2        0.5        0.66666667 0.42857143
  0.4        0.66666667]
 [0.5        0.5        0.8        0.5        0.33333333 0.57142857
  0.6        0.33333333]]
0.6666666666666666 1.0 0.24219605217082474
0.4 1.0 0.9091098645028842
0.42857142857142855 1.0 0.28349483150088406
0.2 1.0 0.3193166919526449
0.42857142857142855 1.0 0.1569213114489003
0.2 1.0 0.2828458308788744
0.42857142857142855 1.0 0.3203437440327297
0.2 1.0 0.6051718714394909
0.42857142857142855 1.0 0.05739113666372653
0.2 1.0 0.13576738180140258
0.6666666666666666 1.0 0.7824248497074135
0.5 1.0 0.

Exception: Unexpected error: random number outside [0, 1]

In [None]:
# Q7

# Create a box plot
plt.figure(figsize=(8, 6))
plt.boxplot(results_df['accuracies'], labels=results_df['order'])
plt.xlabel('Order')
plt.ylabel('Accuracy')
plt.title('Box Plot of Accuracy vs Order')
plt.show()


# Q8

In my opinion, there are some choices of training data that could potentially yield better results:

- Using longer sequences in the training data may capture more complex patterns and dependencies, leading to more accurate predictions. However, longer sequences also increase the complexity of the model and require more computational resources for training. 


- Implementing cross-validation techniques to evaluate the model's performance on different subsets of the data can identify potential areas for improvement.
