In [None]:
# import sys
# !{sys.executable} -m pip install ollama

## Set up your project

In [None]:
# Create and open in VSCode a folder structure called: ~/Desktop/Deepseek_Python
# Run this in your Bash Terminal window in VSCode
# ===============================================
# ls -la  # Check Folder structure
# cd ~/Desktop/Deepseek_Python  # Navigate to the Correct Directory:
# python -m venv venv # Create a Virtual Environment
# source venv/Scripts/activate # Activate your virtual environment
# pip list # List all installed packages in venv
# OR more details
# pip freeze
# If needed: pip install ollama

## Import Libraries

In [1]:
from ollama import chat
from ollama import ChatResponse
import ipywidgets as widgets
from IPython.display import display, HTML
import re  
from datetime import datetime
import pytz
from credentials import SEC_API_KEY

## Get Financial Statements from SEC-Edgar Website

#### https://gist.github.com/janlukasschroeder/3da274150fd00c1c1776c7e541a9b61b#file-extracting-financial-statements-from-sec-filings-ipynb

In [None]:
# install the SEC API Python package
# import sys
# !{sys.executable} -m pip install sec-api

In [2]:
# get your free API key at https://sec-api.io
api_key = SEC_API_KEY

In [3]:
import requests
import json
import pandas as pd

# 10-K filing URL of AbbVie
filing_url = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1551152/000155115225000020/abbv-20241231.htm"

# XBRL-to-JSON converter API endpoint
xbrl_converter_api_endpoint = "https://api.sec-api.io/xbrl-to-json"

final_url = xbrl_converter_api_endpoint + "?htm-url=" + filing_url + "&token=" + api_key

# make request to the API
response = requests.get(final_url)

# load JSON into memory
xbrl_json = json.loads(response.text)

In [5]:
# income statement example
print(json.dumps(xbrl_json['StatementsOfIncome']['CostOfGoodsAndServicesSold'][0:2], indent=7))

[
       {
              "decimals": "-6",
              "unitRef": "usd",
              "period": {
                     "startDate": "2024-01-01",
                     "endDate": "2024-12-31"
              },
              "value": "16904000000"
       },
       {
              "decimals": "-6",
              "unitRef": "usd",
              "period": {
                     "startDate": "2023-01-01",
                     "endDate": "2023-12-31"
              },
              "value": "20415000000"
       }
]


In [6]:
# convert XBRL-JSON of income statement to pandas dataframe
def get_income_statement(xbrl_json):
    income_statement_store = {}

    # iterate over each US GAAP item in the income statement
    for usGaapItem in xbrl_json['StatementsOfIncome']:
        values = []
        indicies = []

        for fact in xbrl_json['StatementsOfIncome'][usGaapItem]:
            # only consider items without segment. not required for our analysis.
            if 'segment' not in fact:
                index = fact['period']['startDate'] + '-' + fact['period']['endDate']
                # ensure no index duplicates are created
                if index not in indicies:
                    values.append(fact['value'])
                    indicies.append(index)                    

        income_statement_store[usGaapItem] = pd.Series(values, index=indicies) 

    income_statement = pd.DataFrame(income_statement_store)
    # switch columns and rows so that US GAAP items are rows and each column header represents a date range
    return income_statement.T 

income_statement = get_income_statement(xbrl_json)

In [8]:
income_statement.iloc[:, [0, 1, 2]]

Unnamed: 0,2022-01-01-2022-12-31,2023-01-01-2023-12-31,2024-01-01-2024-12-31
Revenues,58054000000.0,54318000000.0,56334000000.0
CostOfGoodsAndServicesSold,17414000000.0,20415000000.0,16904000000.0
SellingGeneralAndAdministrativeExpense,15260000000.0,12872000000.0,14752000000.0
ResearchAndDevelopmentExpenseExcludingAcquiredInProcessCost,6510000000.0,7675000000.0,12791000000.0
ResearchAndDevelopmentAssetAcquiredOtherThanThroughBusinessCombinationWrittenOff,697000000.0,778000000.0,2757000000.0
OtherOperatingIncomeExpenseNet,-56000000.0,179000000.0,7000000.0
CostsAndExpenses,39937000000.0,41561000000.0,47197000000.0
OperatingIncomeLoss,18117000000.0,12757000000.0,9137000000.0
InterestIncomeExpenseNonoperatingNet,-2044000000.0,-1684000000.0,-2160000000.0
ForeignCurrencyTransactionGainLossBeforeTax,-148000000.0,-146000000.0,-21000000.0


In [9]:
# convert XBRL-JSON of balance sheet to pandas dataframe
def get_balance_sheet(xbrl_json):
    balance_sheet_store = {}

    for usGaapItem in xbrl_json['BalanceSheets']:
        values = []
        indicies = []

        for fact in xbrl_json['BalanceSheets'][usGaapItem]:
            # only consider items without segment.
            if 'segment' not in fact:
                index = fact['period']['instant']

                # avoid duplicate indicies with same values
                if index in indicies:
                    continue
                    
                # add 0 if value is nil
                if "value" not in fact:
                    values.append(0)
                else:
                    values.append(fact['value'])

                indicies.append(index)                    

            balance_sheet_store[usGaapItem] = pd.Series(values, index=indicies) 

    balance_sheet = pd.DataFrame(balance_sheet_store)
    # switch columns and rows so that US GAAP items are rows and each column header represents a date instant
    return balance_sheet.T

balance_sheet = get_balance_sheet(xbrl_json)

In [12]:
balance_sheet.iloc[:, -2:]

Unnamed: 0,2023-12-31,2024-12-31
CashAndCashEquivalentsAtCarryingValue,12814000000.0,5524000000.0
ShortTermInvestments,2000000.0,31000000.0
AccountsReceivableNetCurrent,11155000000.0,10919000000.0
InventoryNet,4099000000.0,4181000000.0
PrepaidExpenseAndOtherAssets,4932000000.0,4927000000.0
AssetsCurrent,33002000000.0,25582000000.0
LongTermInvestments,304000000.0,279000000.0
PropertyPlantAndEquipmentNet,4989000000.0,5134000000.0
IntangibleAssetsNetExcludingGoodwill,55610000000.0,60068000000.0
Goodwill,32293000000.0,34956000000.0


In [13]:
def get_cash_flow_statement(xbrl_json):
    cash_flows_store = {}

    for usGaapItem in xbrl_json['StatementsOfCashFlows']:
        values = []
        indicies = []

        for fact in xbrl_json['StatementsOfCashFlows'][usGaapItem]:        
            # only consider items without segment.
            if 'segment' not in fact:
                # check if date instant or date range is present
                if "instant" in fact['period']:
                    index = fact['period']['instant']
                else:
                    index = fact['period']['startDate'] + '-' + fact['period']['endDate']

                # avoid duplicate indicies with same values
                if index in indicies:
                    continue

                if "value" not in fact:
                    values.append(0)
                else:
                    values.append(fact['value'])

                indicies.append(index)                    

        cash_flows_store[usGaapItem] = pd.Series(values, index=indicies) 


    cash_flows = pd.DataFrame(cash_flows_store)
    return cash_flows.T
    
cash_flows = get_cash_flow_statement(xbrl_json)
     

In [16]:
cash_flows.iloc[:, [1, 3, 5]]

Unnamed: 0,2022-01-01-2022-12-31,2023-01-01-2023-12-31,2024-01-01-2024-12-31
ProfitLoss,11845000000.0,4873000000.0,4286000000.0
Depreciation,778000000.0,752000000.0,764000000.0
AmortizationOfIntangibleAssets,7689000000.0,7946000000.0,7622000000.0
DeferredIncomeTaxExpenseBenefit,-1931000000.0,-2889000000.0,-1449000000.0
BusinessCombinationContingentConsiderationArrangementsChangeInAmountOfContingentConsiderationLiability1,2761000000.0,5128000000.0,3771000000.0
PaymentForContingentConsiderationLiabilityOperatingActivities,164000000.0,870000000.0,1995000000.0
ShareBasedCompensation,671000000.0,747000000.0,911000000.0
UpfrontCostsRelatedToCollaborations,697000000.0,778000000.0,2757000000.0
GainLossOnSaleOfOtherAssets,172000000.0,0.0,0.0
NonCashLitigationReserveAdjustmentsNetOfCashPayments,2243000000.0,-443000000.0,508000000.0


In [17]:
# Convert the DataFrame to a string
# to create a deepseek-r1 message object
df_isstring = income_statement.iloc[:, [0, 1, 2]].to_string()
print(df_isstring)

                                                                                            2022-01-01-2022-12-31 2023-01-01-2023-12-31 2024-01-01-2024-12-31
Revenues                                                                                              58054000000           54318000000           56334000000
CostOfGoodsAndServicesSold                                                                            17414000000           20415000000           16904000000
SellingGeneralAndAdministrativeExpense                                                                15260000000           12872000000           14752000000
ResearchAndDevelopmentExpenseExcludingAcquiredInProcessCost                                            6510000000            7675000000           12791000000
ResearchAndDevelopmentAssetAcquiredOtherThanThroughBusinessCombinationWrittenOff                        697000000             778000000            2757000000
OtherOperatingIncomeExpenseNet                      

In [18]:
# Convert the DataFrame to a string
# to create a deepseek-r1 message object
df_bsstring = balance_sheet.iloc[:, -2:].to_string()
print(df_bsstring)

                                                                          2023-12-31    2024-12-31
CashAndCashEquivalentsAtCarryingValue                                    12814000000    5524000000
ShortTermInvestments                                                         2000000      31000000
AccountsReceivableNetCurrent                                             11155000000   10919000000
InventoryNet                                                              4099000000    4181000000
PrepaidExpenseAndOtherAssets                                              4932000000    4927000000
AssetsCurrent                                                            33002000000   25582000000
LongTermInvestments                                                        304000000     279000000
PropertyPlantAndEquipmentNet                                              4989000000    5134000000
IntangibleAssetsNetExcludingGoodwill                                     55610000000   60068000000
Goodwill  

In [19]:
# Convert the DataFrame to a string
# to create a deepseek-r1 message object
df_cfstring = cash_flows.iloc[:, [1, 3, 5]].to_string()
print(df_cfstring)

                                                                                                               2022-01-01-2022-12-31 2023-01-01-2023-12-31 2024-01-01-2024-12-31
ProfitLoss                                                                                                               11845000000            4873000000            4286000000
Depreciation                                                                                                               778000000             752000000             764000000
AmortizationOfIntangibleAssets                                                                                            7689000000            7946000000            7622000000
DeferredIncomeTaxExpenseBenefit                                                                                          -1931000000           -2889000000           -1449000000
BusinessCombinationContingentConsiderationArrangementsChangeInAmountOfContingentConsiderationLiability1            

# Analyze Financial Data in Deepseek-r1 

## Analyze Income Statement using Deepseek-r1 1.5b and 7b models

In [22]:
def runSmallmodel(input_text, model_name):
   # Set up the model
   model_name = "deepseek-r1:1.5b"

   # # Define the input text
   # input_text = "Is the month of February summer?"

   # Generate a result from the model
   results: ChatResponse = chat(model=model_name, 
                           messages=[
                                       {"role": "user", "content": input_text}
                                    ])
   # Format the result with newline characters
   formatted_result = results.message.content.replace('. ', '.\n')

   # Print the formatted result
   print(formatted_result)
   return formatted_result

## Style model output using HTML and CSS

In [None]:
# Define the input text
input_text = "What is a Unix(Epoc) timestamp?"
model_name = "deepseek-r1:1.5b"
results = runSmallmodel(input_text, model_name)
html_content = f"""
<div style="color: orange; font-size: 20px; font-family: 'Comic Sans MS', cursive, sans-serif; font-style: italic;">
    {results.replace('\n', '<br>')}
</div>
"""
# Display the styled content
display(HTML(html_content))

<think>
Okay, so I'm trying to understand what a Unix Epoch Timestamp (Unix(Epoc)) timestamp is.
From the initial explanation, it seems like it's some sort of way to represent dates and times in a machine-readable format.
The user mentioned that each value corresponds to an epoch timestamp, which makes sense because epoch timestamps are specific points in time used for programming.

I remember that epoch timestamps were introduced by Tim Berners-Lee to create a standardized way to refer to moments in time across different systems.
Each Unix timestamp is a big-endian 64-bit integer, meaning the most significant byte comes first.
So when we see numbers like 1502738019 or 604603199, they represent these timestamps.

The user then broke down the timestamp into different parts: year (four digits), month (two digits), day (two digits), hour (two digits), minute (two digits), and second (two digits).
It's important to note that each part represents a specific time unit.
So if I see 1502738019

In [25]:
# from IPython.display import display, HTML

# Link the external CSS file
display(HTML("<link rel='stylesheet' type='text/css' href='styles.css'>"))

# HTML content with the CSS class
html_content = f"""
<div class='formatted-result'>
    {results.replace('\n', '<br>')}
</div>
"""
# Display the styled content
display(HTML(html_content))

## Create a Python VBox widget to display the models available

In [27]:
# Apply dark mode CSS styling to the Jupyter notebook widget
dark_mode_css = """
<style>
.widget-dropdown,
.widget-textarea,
.widget-button,
.jupyter-widgets {
    background-color: #2d2d2d !important;
    color: #f0f0f0 !important;
    border-color: #444 !important;
}

.widget-dropdown > select {
    background-color: #2d2d2d !important;
    color: #f0f0f0 !important;
}

.widget-label {
    color: #f0f0f0 !important;
}

.widget-textarea > textarea {
    background-color: #333 !important;
    color: #f0f0f0 !important;
    border: 1px solid #555 !important;
}

.output_area {
    background-color: #1e1e1e !important;
    border-radius: 5px;
    padding: 10px;
    margin-top: 10px;
}
</style>
"""
display(HTML(dark_mode_css))

# Define the available models
models = [
    'deepseek-r1:1.5b',
    'deepseek-r1',
    'ollama-code',
    'llama3:8b',
    'llama3:70b',
    'mistral:7b'
]

# Function to run the model with selected model name
def runmodel(input_text, model_name):
    # Here we're using the model_name parameter instead of hardcoding it
    # Set up the model
    
    # Generate a result from the model
    results = chat(model=model_name, 
                   messages=[
                       {"role": "user", "content": input_text}
                   ])
    
    # Format the result with newline characters
   # formatted_result = results.message.content.replace('. ', '.\n')
    
    # Print the formatted result
   # print(formatted_result)
    return results

# Create a text input for the prompt
# text_input = widgets.Textarea(
#     value="Is the month of February summer?",
#     description='Prompt:',
#     layout={'width': '80%', 'height': '100px'}
# )

# Create a dropdown widget
model_dropdown = widgets.Dropdown(
    options=models,
    value=models[0],  # Default selection
    description='Model:',
    style={'description_width': 'initial'},
    layout={'width': 'auto'}
)

# Create a button to run the model
run_button = widgets.Button(
    description='Run Model',
    button_style='primary',
    tooltip='Click to run the selected model'
)

# Create an output area for displaying results
output_area = widgets.Output()

# Function to handle button click
def on_button_click(b):
    with output_area:
        output_area.clear_output()
        selected_model = model_dropdown.value
       # input_text = "What is UNIX timestamp or so-called epoch time?"
        input_text = "Can you analyze this income statement for Abbvie based on this income statement data?\n\n" + df_isstring

        print(f"Running model: {selected_model}")
        results = runmodel(input_text, selected_model)
        timestamp_utc = results.created_at
    
        # Trim the nanoseconds to milliseconds
        timestamp_utc = timestamp_utc[:26] + "Z"

        # Convert the timestamp to a datetime object
        dt_utc = datetime.strptime(timestamp_utc, "%Y-%m-%dT%H:%M:%S.%fZ")

        # Define the UTC timezone
        utc = pytz.utc

        # Localize the datetime object to UTC
        dt_utc = utc.localize(dt_utc)

        # Define the Central Time (Chicago) timezone
        central = pytz.timezone("America/Chicago")

        # Convert the datetime object to Central Time
        dt_central = dt_utc.astimezone(central)

        # Format the datetime object to the desired string format
        created_at = dt_central.strftime("Created: %Y-%m-%d %H:%M:%S")

        total_duration = results.total_duration
        
       
        
        content = results.message.content.replace('. ','\n')
        print(created_at)
        print("===============================================")
        print(content)
        
        # If you prefer to style it yourself, you can use RegEx and HTML
        # --------------------------------------------------------------
        # patterns_replacements = [
        # (r"\. ", r". <br>"),           # Replace ". " with ". <br>"
        # (r"\b1\.\*\*", r"<br>1. **"),  # Replace "1.**" with "1.**"
        # (r"\b2\.\*\*", r"<br>2. **"),  # Replace "2**" with "<br>2**"
        # (r"\b3\.\*\*", r"<br>3. **"),  # Replace "3.**" with "<br>3.**"
        # (r"\b4\.\*\*", r"<br>4. **"),  # Replace "4.**" with "<br>4.**"
        # (r"\b5\.\*\*", r"<br>5. **"),   # Replace "5.**" with "<br>5.**"
        # (r"\b6\.\*\*", r"<br>6. **"),   # Replace "6.**" with "<br>6.**"
        # (r"\b7\. \*\*", r"<br>7. **")   # Replace "7.**" with "<br>7.**"
        # ]    
        # # Perform the replacements
        # for pattern, replacement in patterns_replacements:
        #     content = re.sub(pattern, replacement, content)

        # html_content = f"""
        # <div style="color: yellow; font-size: 20px; font-family: 'Arial', cursive, sans-serif;">
        #     <strong>Model: {selected_model} </strong><br><br
        #     <strong>Results:</strong><br>
        #     Created: {created_at} <br><br>
        # </div>
        # <div style="font-size: 18px; font-family: 'Arial', cursive, sans-serif; font-style: italic;">    
        #     {content}
        # </div>
        # """
        # display(HTML(html_content))
        # --------------------------------------------------------------

# Register the callback function
run_button.on_click(on_button_click)

# Display the widgets
display(widgets.VBox([
    widgets.Label('Select a model and enter a prompt:'),
    model_dropdown,
   # text_input,
    run_button,
    output_area
]))

VBox(children=(Label(value='Select a model and enter a prompt:'), Dropdown(description='Model:', layout=Layout…