# Libraries

In [1]:
# Core Libraries 
import pandas as pd
import numpy as np
import requests

# For Pattern
import re

* Libraries Version 

In [2]:
print(f"Pandas Version: {pd.__version__} \n ----")
print(f"Numpy Version: {np.__version__}\n ----")
print(f"Re Version: {re.__version__}\n ----")
print(f"Requests Version: {requests.__version__}")

Pandas Version: 2.0.2 
 ----
Numpy Version: 1.22.3
 ----
Re Version: 2.2.1
 ----
Requests Version: 2.31.0


# Alpha Vantage Data Set

!! Please Before the run, receive your api key form website and setup api key inside of .env file

[AlphaVantage Website](https://www.alphavantage.co/documentation/)

* Also, you can check Api Key Requests

## API KEY Configuration

* API keys hidden for security !!!

In [3]:
#for Api Key --- AlphaVantage Api Key  (https://www.alphavantage.co/)
from dotenv import load_dotenv
import os

def configure():
    load_dotenv()


**Company Information**

In [4]:
# IBM = International Business Machines Corporation
Company_info_url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol=IBM&apikey={os.getenv("Alphavantage_key")}'
r_info = requests.get(Company_info_url)

IBM = r_info.json()

print(IBM)

{'Symbol': 'IBM', 'AssetType': 'Common Stock', 'Name': 'International Business Machines', 'Description': 'International Business Machines Corporation (IBM) is an American multinational technology company headquartered in Armonk, New York, with operations in over 170 countries. The company began in 1911, founded in Endicott, New York, as the Computing-Tabulating-Recording Company (CTR) and was renamed International Business Machines in 1924. IBM is incorporated in New York. IBM produces and sells computer hardware, middleware and software, and provides hosting and consulting services in areas ranging from mainframe computers to nanotechnology. IBM is also a major research organization, holding the record for most annual U.S. patents generated by a business (as of 2020) for 28 consecutive years. Inventions by IBM include the automated teller machine (ATM), the floppy disk, the hard disk drive, the magnetic stripe card, the relational database, the SQL programming language, the UPC barcod

Data Frame of Companies Information

In [4]:
df = pd.DataFrame(IBM.keys())
df.rename(columns={0:"Title"},inplace= True)

words = df["Title"]

words_list = []

for i in range(0,len(words)):
    list_words = words[i]

    pattern = "[A-Z][^A-Z]*"

    x = re.findall(pattern, list_words)
    x_new = " ".join(x).title()
    words_list.append(x_new)

df["Title"] = words_list
df.rename(columns={"index":"Title"}, inplace= True)


values = pd.DataFrame(IBM.values())

df["Values"] = values
df

Unnamed: 0,Title,Values
0,Symbol,IBM
1,Asset Type,Common Stock
2,Name,International Business Machines
3,Description,International Business Machines Corporation (I...
4,C I K,51143
5,Exchange,NYSE
6,Currency,USD
7,Country,USA
8,Sector,TECHNOLOGY
9,Industry,COMPUTER & OFFICE EQUIPMENT


**Company News**

In [None]:
# BA = The Boeing Company

Company_news_url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=BA&apikey={os.getenv("Alphavantage_key")}'
r_news = requests.get(Company_news_url)
BA = r_news.json()

news = pd.DataFrame(BA.get("feed"))

newsx = news[["title","url"]]
newsx.columns = ["News","News_Url"] 
newsx.head()

## Alpha Vantage Financial Statements 

1. FS[0] = Income Statement

2. FS[1] = Balance Sheet

3. FS[2] = Cashflow

In [None]:

FS = ["INCOME_STATEMENT","BALANCE_SHEET","CASH_FLOW"]   # Financial Statements

data = []
IS = {}                                                 # Income Statements
BS = {}                                                 # Balance Sheet
CF = {}                                                 # Cash Flow


for i in FS:
    url = f'https://www.alphavantage.co/query?function={i}&symbol=IBM&apikey={os.getenv("Alphavantage_key")}'
    r = requests.get(url)
    data.append(r.json())

    data_df = pd.DataFrame(data)
    
    df = {}
    for i in np.arange(0, len(data_df)):
        df[i] = data_df.iloc[i].get("annualReports")

IS = pd.DataFrame(df[0]).T.reset_index()
BS = pd.DataFrame(df[1]).T.reset_index()
CF = pd.DataFrame(df[2]).T.reset_index()

FS = [IS,BS,CF]     # Combine all statements to one frame

IS_fiscal_Date = IS.iloc[0].to_list()
IS_index = IS.iloc[1].to_list()

FS_pattern = []

for i in np.arange(0, len(FS)):             # Removing unusefull words
    FS[i].replace("None",0,inplace = True)
    FS[i].fillna(0)

    FS[i].copy
    FS[i].loc[2:,[0,1,2,3,4]] = FS[i].loc[2:,[0,1,2,3,4]].astype("float64")
    
    FS[i].loc[:1] = IS_fiscal_Date
    FS[i].loc[1] = IS_index
    
    FS_pattern.append(FS[i]["index"])

pattern_list = pd.DataFrame()

one = FS_pattern[0]
two = FS_pattern[1]
three = FS_pattern[2]

pat_list = [one,two,three]      # Making Capitilazation of words

pattern = '[A-Z][^A-Z]*|[A-Z]*[^A-Z][^A-Z]*|[A-Z]*[^A-Z][^A-Z][^A-Z]*|[A-Z]*[^A-Z][^A-Z][^A-Z][^A-Z]*'

FS_0_join = []
for i in np.arange(0, len(one)):
    FS_0_pattern = one[i]
    FS_0_findall = re.findall(pattern,FS_0_pattern)
    FS_0_join.append(" ".join(FS_0_findall).title())
FS[0]["index"] = FS_0_join

FS_1_join = []
for z in np.arange(0, len(two)):
    FS_1_pattern = two[z]
    FS_1_findall = re.findall(pattern,FS_1_pattern)
    FS_1_join.append(" ".join(FS_1_findall).title())
FS[1]["index"] = FS_1_join

FS_2_join = []
for q in np.arange(0, len(three)):
    FS_2_pattern = three[q]
    FS_2_findall = re.findall(pattern,FS_2_pattern)
    FS_2_join.append(" ".join(FS_2_findall).title())
FS[2]["index"] = FS_2_join

index_names = ["INCOME STATEMENT","BALANCE SHEET","CASH FLOW"]  # Combine all clean statements to one frame

for i in np.arange(0,3):
    FS[i].columns = IS_fiscal_Date
    FS[i] = FS[i].drop(0)

    FS[i].index.name = index_names[i]

display(FS[0])
display(FS[1])
display(FS[2])