### DB Analysis

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

In [2]:
df = pd.read_csv("../data/1_work_data/processed_data/control.csv")

In [3]:
missing_values = df.isnull().sum().sort_values(ascending=False)
print("\nMissing Values Count:\n", missing_values)


Missing Values Count:
 ltda_yoy     44
ni_2y        24
ev_eb_yoy    21
pe_yoy       21
freturn      15
             ..
n_sales       0
val_purch     0
n_purch       0
tdq           0
sector        0
Length: 103, dtype: int64


In [4]:
ins_df = db_handler.fetch_insider_data("AMZN")
ins_df

tic,filling_date,trade_date,owner_name,title,transaction_type,last_price,qty,shares_held,owned,value
str,date,date,str,str,str,str,str,str,str,str
"""AMZN""",2005-08-03,2005-08-01,"""Alberg Tom A""","""Dir""","""S - Sale""","""$44.81""","""-33,470""","""195,865""","""-15%""","""-$1,499,791"""
"""AMZN""",2005-08-03,2005-08-01,"""Kilar Jason""","""SVP""","""S - Sale+OE""","""$44.75""","""-15,000""","""30,187""","""-33%""","""-$671,181"""
"""AMZN""",2005-08-03,2005-08-01,"""Szkutak Thomas J""","""SVP, CFO""","""S - Sale""","""$44.68""","""-30,000""","""28,015""","""-52%""","""-$1,340,256"""
"""AMZN""",2005-08-03,2005-08-01,"""Wilke Jeffrey A""","""SVP""","""S - Sale+OE""","""$45.05""","""-33,750""","""40,149""","""-46%""","""-$1,520,384"""
"""AMZN""",2005-08-03,2005-08-01,"""Wilson L Michelle""","""SVP""","""S - Sale+OE""","""$44.77""","""-25,000""","""32,395""","""-44%""","""-$1,119,193"""
…,…,…,…,…,…,…,…,…,…,…
"""AMZN""",2024-08-23,2024-08-21,"""Herrington Douglas J""","""CEO Worldwide Amazon Stores""","""S - Sale+OE""","""$181.20""","""-6,104""","""527,738""","""-1%""","""-$1,106,048"""
"""AMZN""",2024-08-23,2024-08-21,"""Jassy Andrew R""","""Pres, CEO""","""S - Sale+OE""","""$180.77""","""-20,784""","""2,131,929""","""-1%""","""-$3,757,109"""
"""AMZN""",2024-08-23,2024-08-21,"""Olsavsky Brian T""","""SVP, CFO""","""S - Sale+OE""","""$180.00""","""-14,600""","""50,562""","""-22%""","""-$2,628,000"""
"""AMZN""",2024-08-23,2024-08-21,"""Reynolds Shelley""","""VP""","""S - Sale+OE""","""$181.04""","""-3,791""","""124,117""","""-3%""","""-$686,331"""


### Web scraping quick tests

In [3]:
# scrape GP and COST - > check date errors
import yfinance as yf

t = yf.Ticker("MSFT")

data = t.history(start="2005-01-01").reset_index(drop=False)
data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2005-01-03 00:00:00-05:00,18.635157,18.739460,18.530856,18.593437,65002900,0.0,0.0
1,2005-01-04 00:00:00-05:00,18.683830,18.843759,18.537808,18.662970,109442100,0.0,0.0
2,2005-01-05 00:00:00-05:00,18.662968,18.843757,18.607341,18.621248,72463500,0.0,0.0
3,2005-01-06 00:00:00-05:00,18.669920,18.815941,18.523898,18.600386,76890500,0.0,0.0
4,2005-01-07 00:00:00-05:00,18.649061,18.697734,18.509993,18.544760,68723300,0.0,0.0
...,...,...,...,...,...,...,...,...
4951,2024-09-05 00:00:00-04:00,407.619995,413.100006,406.130005,408.390015,14195500,0.0,0.0
4952,2024-09-06 00:00:00-04:00,409.059998,410.649994,400.799988,401.700012,19609500,0.0,0.0
4953,2024-09-09 00:00:00-04:00,407.239990,408.649994,402.149994,405.720001,15295100,0.0,0.0
4954,2024-09-10 00:00:00-04:00,408.200012,416.329987,407.700012,414.200012,19594300,0.0,0.0


In [4]:
t.financials

Unnamed: 0,2023-12-31,2022-12-31,2021-12-31,2020-12-31,2019-12-31
Tax Effect Of Unusual Items,0.0,0.0,0.0,0.0,
Tax Rate For Calcs,0.083158,0.21,0.27,0.010717,
Normalized EBITDA,273915000.0,-334447000.0,-469957000.0,-1151017000.0,
Total Unusual Items,,,,811000.0,-3000.0
Total Unusual Items Excluding Goodwill,,,,811000.0,-3000.0
Net Income From Continuing Operation Net Minority Interest,209825000.0,-373705000.0,-520379000.0,-1166391000.0,
Reconciled Depreciation,33354000.0,22522000.0,14897000.0,13871000.0,
Reconciled Cost Of Revenue,431105000.0,408549000.0,339404000.0,352547000.0,
EBITDA,273915000.0,-334447000.0,-469957000.0,-1151017000.0,
EBIT,240561000.0,-356969000.0,-484854000.0,-1164888000.0,


## Intro to Polars

In [1]:
import polars as pl
import numpy as np

num_rows = 5000
rng = np.random.default_rng(seed=7)

buildings_data = {
    "sqft": rng.exponential(scale=1000, size=num_rows),
    "year": rng.integers(low=1995, high=2023, size=num_rows),
    "building_type": rng.choice(["A", "B", "C"], size=num_rows),
}

buildings = pl.DataFrame(buildings_data)
buildings

sqft,year,building_type
f64,i64,str
707.529256,1996,"""C"""
1025.203348,2020,"""C"""
568.548657,2012,"""A"""
895.109864,2000,"""A"""
206.532754,2011,"""A"""
…,…,…
710.435755,2003,"""C"""
408.872783,2009,"""C"""
57.562059,2019,"""C"""
3728.088949,2020,"""C"""


In [4]:
buildings.schema

Schema([('sqft', Float64), ('year', Int64), ('building_type', String)])

In [6]:
buildings.select(["sqft", "year"])

sqft,year
f64,i64
707.529256,1996
1025.203348,2020
568.548657,2012
895.109864,2000
206.532754,2011
…,…
710.435755,2003
408.872783,2009
57.562059,2019
3728.088949,2020


In [8]:
buildings.filter(pl.col("year") > 2015).select(pl.col("year").min())

year
i64
2016


In [4]:
# Sample DataFrame
df = pl.DataFrame({
    "category": ["A", "A", "B", "B", "C"],
    "value": [1, 2, 3, 4, 5]
})

# Define a complex function to apply to each group
def complex_function(group_df):
    # For demonstration, we'll just calculate the mean of the 'value' column for each group
    return group_df.with_columns(pl.col("value").mean().alias("mean_value"))

# Group by 'category' and apply the complex function
result = df.group_by("category").agg(complex_function)

print(result)

TypeError: cannot create expression literal for value of type function: <function complex_function at 0x12d4edbd0>

Hint: Pass `allow_object=True` to accept any value and create a literal of type Object.