# Analysis of the LLM Runs

In [81]:
import json
import pandas as pd
import numpy as np

#### Llama inference task

In [5]:
# load the data
with open('Data/base1.json', 'rb') as file:
    base = json.load(file)

In [105]:
base[21]['confidence score']

80

In [86]:
response = base[0]['response']
response.find('HOLD')

1321

In [106]:
def unpack_to_dataframe(data):
    date = []
    sec = []
    decision = []
    confidence = []
    
    for result in data:
        try:
            date.append(result['date'])
            sec.append(result['security'])
            try:
                decision.append(result['decision'])
                confidence.append(result['confidence score'])
            except:
                try:
                    response = result['response']

                    isBuy = False
                    isSell = False
                    isHold = False

                    if response.find('BUY') != -1:
                        isBuy = True
                    if response.find("SELL") != -1:
                        isSell = True
                    if response.find("HOLD") != -1:
                        isHold = True

                    if isBuy and not(isSell or isHold):
                        decision.append('BUY')
                        confidence.append(np.nan)
                        continue
                    if isSell and not(isBuy or isHold):
                        decision.append('SELL')
                        confidence.append(np.nan)
                        continue
                    if isHold and not(isBuy or isSell):
                        decision.append('HOLD')
                        confidence.append(np.nan)
                        continue

                    decision.append('Missing')
                    confidence.append(np.nan)
                except:
                    decision.append('Missing')
                    confidence.append(np.nan)
        except:
            print('Missing date')
        
        
        
    return {'Date': date, 'Security': sec, 'Decision': decision, 'Confidence': confidence}
        
            

In [107]:
s = unpack_to_dataframe(base)

Missing date


In [108]:
s['Confidence'][2]

80

In [109]:
df = pd.DataFrame(data=s)
df

Unnamed: 0,Date,Security,Decision,Confidence
0,2020-02-06,MMM UN Equity,HOLD,
1,2020-02-12,CSCO UW Equity,BUY,80.0
2,2020-02-12,AMGN UQ Equity,BUY,80.0
3,2020-02-13,AXP UN Equity,SELL,
4,2020-02-13,NVDA UQ Equity,BUY,70.0
...,...,...,...,...
873,2024-11-19,HD UN Equity,BUY,70.0
874,2024-11-19,WMT UN Equity,BUY,70.0
875,2024-11-20,NVDA UQ Equity,BUY,85.0
876,2024-12-03,CRM UN Equity,BUY,85.0


In [110]:

def explore_data(df):
    print("Number Missing: ", df['Decision'].value_counts()['Missing'])
    print("Number of Buy: ", df['Decision'].value_counts()['BUY'])
    print("Number of Sell: ", df['Decision'].value_counts()['SELL'])
    print("Number of Hold: ", df['Decision'].value_counts()['HOLD'])

In [111]:
explore_data(df)

Number Missing:  35
Number of Buy:  679
Number of Sell:  107
Number of Hold:  56


#### Deepseek inference 14B model

In [72]:
# load the data
with open('Data/base_deepseek_r2.json', 'rb') as file:
    ds = json.load(file)

In [115]:
ds[3]

{'security': 'CVX UN Equity',
 'date': '2020-05-06',
 'response': 'Okay, so I need to figure out whether to buy, sell, or hold this company based on the financial data provided. Let me start by looking at the income statement first. The revenue has been fluctuating a bit. It was 2.9705e+10 last year, but in previous years, it was higher, like 4.0338e+10 two years ago. So, revenue isn\'t showing a clear upward trend.\n\nLooking at the gross profit, it\'s been inconsistent too. Last year it was 3.45e+09, but two years ago it was negative 8.504e+09. Wait, that\'s a big swing. Maybe there was some one-time expense or loss then. Gross profit has been positive in the more recent years, which is good, but it\'s not showing consistent growth. It went from 4.62e+09 to 4.445e+09 to 4.92e+09. Hmm, not a clear trend.\n\nOperating income is also fluctuating. Last year it was 2.609e+09, but the year before that, it was negative 1.0003e+10. That\'s a huge drop. So, the company had a significant loss 

In [112]:
s = unpack_to_dataframe(ds)
df1 = pd.DataFrame(data=s)
df1

Unnamed: 0,Date,Security,Decision,Confidence
0,2020-05-05,DD UN Equity,SELL,75.0
1,2020-05-06,XOM UN Equity,SELL,75.0
2,2020-05-06,UNH UN Equity,SELL,75.0
3,2020-05-06,CVX UN Equity,HOLD,
4,2020-05-07,PFE UN Equity,SELL,85.0
...,...,...,...,...
891,2025-01-31,XOM UN Equity,SELL,85.0
892,2025-02-04,PFE UN Equity,HOLD,75.0
893,2025-02-04,MRK UN Equity,Missing,
894,2025-02-04,AMGN UW Equity,Missing,


In [113]:
explore_data(df1)

Number Missing:  112
Number of Buy:  87
Number of Sell:  267
Number of Hold:  430
