In [1]:
import json

# Load the JSON data
with open("structured_data_all.json", "r") as f:
    data = json.load(f)

In [2]:
data

[{'report_date': '2014-09-27',
  'section': 'profit_and_loss',
  'net_sales': {'products': None, 'services': None, 'total': 182795},
  'cost_of_sales': {'products': None, 'services': None, 'total': 112258},
  'gross_margin': 70537,
  'operating_expenses': {'research_and_development': 6041,
   'selling_general_and_administrative': 11993,
   'total': 18034},
  'operating_income': 52503,
  'other_income_or_expense_net': 980,
  'income_before_tax': 53483,
  'tax_provision': 13973,
  'net_income': 39510,
  'earnings_per_share': {'basic': 6.49, 'diluted': 6.45},
  'shares_used_in_computing_eps': {'basic': 6085572, 'diluted': 6122663}},
 {'report_date': '2015-09-26',
  'section': 'profit_and_loss',
  'net_sales': {'products': None, 'services': None, 'total': 233715},
  'cost_of_sales': {'products': None, 'services': None, 'total': 140089},
  'gross_margin': 93626,
  'operating_expenses': {'research_and_development': 8067,
   'selling_general_and_administrative': 14329,
   'total': 22396},
  '

In [3]:
def get_year(entry):
    return entry["report_date"][:4]


In [4]:
def extract_sales_and_ebt(data):
    sales = {}
    ebt = {}
    for entry in data:
        year = get_year(entry)
        sales[year] = entry["net_sales"]["total"]
        ebt[year] = entry["income_before_tax"]
    return sales, ebt

In [5]:
def calculate_growth(data_dict):
    sorted_years = sorted(data_dict.keys())
    growth = {}
    for i in range(1, len(sorted_years)):
        prev_year = sorted_years[i - 1]
        curr_year = sorted_years[i]
        prev_val = data_dict[prev_year]
        curr_val = data_dict[curr_year]
        if prev_val:  # Avoid divide by zero
            growth[curr_year] = ((curr_val - prev_val) / prev_val) * 100
    return growth

In [6]:
def print_growth(growth_dict, label):
    print(f"\n{label} Growth (% YoY):")
    for year, growth in sorted(growth_dict.items()):
        print(f"{year}: {growth:.2f}%")


In [7]:
sales_dict, ebt_dict = extract_sales_and_ebt(data)
sales_growth = calculate_growth(sales_dict)
ebt_growth = calculate_growth(ebt_dict)

print_growth(sales_growth, "Sales")
print_growth(ebt_growth, "EBT")


Sales Growth (% YoY):
2015: 27.86%
2016: -7.73%
2017: 6.30%
2018: 15.86%
2019: -2.04%
2020: 5.51%
2021: 33.26%
2022: 7.79%
2023: -2.80%
2024: 2.02%

EBT Growth (% YoY):
2015: 35.59%
2016: -15.37%
2017: 4.43%
2018: 13.75%
2019: -9.83%
2020: 2.06%
2021: 62.77%
2022: 9.06%
2023: -4.51%
2024: 8.57%


In [1]:
import dotenv
from dotenv import load_dotenv

In [4]:
ls

README.md                 [34mdocuments_04_06[m[m/          structured_data_all.json
[34m__pycache__[m[m/              financial_model.py        [34mtools[m[m/
app.py                    financial_test.ipynb      websearchtool.ipynb
chain_scoo.py             key.env
chaining_csoo.ipynb       [34mmyenv[m[m/


In [5]:
load_dotenv("key.env")

True

In [7]:
import os
os.getenv("GEMINI_API_KEY")

'AIzaSyBkw-pAnVQ7_PKsb8_IO0BRYkxcSTVknXg'

In [8]:
def flatten_json(json_obj, parent_key='', sep='_'):
    """Flattens a nested JSON object into a single dictionary."""
    items = []
    for k, v in json_obj.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_json(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

In [9]:
import json
with open("structured_data_all.json", "r") as f:
    data = json.load(f)

if isinstance(data, list):
    # Flatten each record in the list.
    flattened_data = [flatten_json(record) for record in data]
else:
    flattened_data = flatten_json(data)

# Print the flattened JSON objects (or use them as needed)
for item in flattened_data:
    print(item)

{'report_date': '2014-09-27', 'section': 'profit_and_loss', 'net_sales_products': None, 'net_sales_services': None, 'net_sales_total': 182795, 'cost_of_sales_products': None, 'cost_of_sales_services': None, 'cost_of_sales_total': 112258, 'gross_margin': 70537, 'operating_expenses_research_and_development': 6041, 'operating_expenses_selling_general_and_administrative': 11993, 'operating_expenses_total': 18034, 'operating_income': 52503, 'other_income_or_expense_net': 980, 'income_before_tax': 53483, 'tax_provision': 13973, 'net_income': 39510, 'earnings_per_share_basic': 6.49, 'earnings_per_share_diluted': 6.45, 'shares_used_in_computing_eps_basic': 6085572, 'shares_used_in_computing_eps_diluted': 6122663}
{'report_date': '2015-09-26', 'section': 'profit_and_loss', 'net_sales_products': None, 'net_sales_services': None, 'net_sales_total': 233715, 'cost_of_sales_products': None, 'cost_of_sales_services': None, 'cost_of_sales_total': 140089, 'gross_margin': 93626, 'operating_expenses_rese

In [10]:
with open("output.json", "r") as f:
    data = json.load(f)

if isinstance(data, list):
    # Flatten each record in the list.
    flattened_data = [flatten_json(record) for record in data]
else:
    flattened_data = flatten_json(data)

# Print the flattened JSON objects (or use them as needed)
for item in flattened_data:
    print(item)

{'Metric': 'sales growth', '2014': 'N/A', '2015': '27.86%', '2016': '-7.73%', '2017': '6.3%', '2018': '15.86%', '2019': '-2.04%', '2020': '5.51%'}
{'Metric': 'gross margin', '2014': '61.41%', '2015': '59.94%', '2016': '60.92%', '2017': '61.53%', '2018': '61.66%', '2019': '62.18%', '2020': '61.77%'}
{'Metric': 'COGS_perc_sales', '2014': '38.59%', '2015': '40.06%', '2016': '39.08%', '2017': '38.47%', '2018': '38.34%', '2019': '37.82%', '2020': '38.23%'}
{'Metric': 'rnd_perc_sales', '2014': '3.3%', '2015': '3.45%', '2016': '4.66%', '2017': '5.05%', '2018': '5.36%', '2019': '6.23%', '2020': '6.83%'}
{'Metric': 'rnd_growth', '2014': 'N/A', '2015': '4.44%', '2016': '34.96%', '2017': '8.45%', '2018': '6.1%', '2019': '16.29%', '2020': '9.59%'}
{'Metric': 'sng_perc_sales', '2014': '6.56%', '2015': '6.13%', '2016': '6.58%', '2017': '6.66%', '2018': '6.29%', '2019': '7.01%', '2020': '7.25%'}
{'Metric': 'income_margin', '2014': '28.72%', '2015': '30.48%', '2016': '27.84%', '2017': '26.76%', '2018'