<a href="https://colab.research.google.com/github/dominikjanyga/network-analysis/blob/main/1_network_analysis_data_collection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Loading libraries.
In the first step, I imported libraries which will be helpful in scraping the data. I used pandas to read HTML tables from the https://stooq.pl/ site. Next, using yfinance library I downloaded financial information on polish stock companies.



In [None]:
import pandas as pd
import yfinance as yf

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#2. Stooq data.
Here, I am using a function that reads the HTML table from the stooq webpage. The goal is to obtain all ticker symbols for the companies that are listed on the Warsaw Stock Exchange and save them all to the empty list.

In [None]:
ticker_list = []

for page in range(1, 9):
    page_url = f"https://stooq.pl/q/i/?s=wig&l={page}"
    wig_table = pd.read_html(page_url)[1]
    wig_selected_rows = wig_table.iloc[5:-1, 0].tolist()
    ticker_list.extend(wig_selected_rows)

print(ticker_list)
len(ticker_list)

['06N', '11B', '1AT', '3RG', 'AAT', 'ABE', 'ABS', 'ACG', 'ACP', 'ACT', 'AGO', 'AGT', 'ALE', 'ALI', 'ALL', 'ALR', 'AMB', 'AMC', 'ANR', 'APE', 'APN', 'APR', 'APT', 'ARH', 'ART', 'ASB', 'ASE', 'AST', 'ATC', 'ATD', 'ATG', 'ATP', 'ATR', 'ATS', 'ATT', 'AWM', 'B24', 'BBD', 'BBT', 'BCM', 'BCS', 'BCX', 'BDX', 'BDZ', 'BFT', 'BHW', 'BIO', 'BIP', 'BLO', 'BMC', 'BMX', 'BNP', 'BOS', 'BOW', 'BRS', 'CAP', 'CAR', 'CAV', 'CBF', 'CCC', 'CDL', 'CDR', 'CEZ', 'CIG', 'CLC', 'CLD', 'CLE', 'CLN', 'CMP', 'COG', 'CPL', 'CPR', 'CPS', 'CRI', 'CRJ', 'CRM', 'CSR', 'CTX', 'DAD', 'DAT', 'DBE', 'DCR', 'DEK', 'DEL', 'DGA', 'DGE', 'DIG', 'DNP', 'DOM', 'DVL', 'EAH', 'EAT', 'ECH', 'EHG', 'EKP', 'ELT', 'ENA', 'ENE', 'ENI', 'ENT', 'EQU', 'ERB', 'ERG', 'ETL', 'EUR', 'FAB', 'FEE', 'FMG', 'FON', 'FRO', 'FSG', 'FTE', 'GEA', 'GIF', 'GKI', 'GMT', 'GOP', 'GPP', 'GPW', 'GRN', 'GRX', 'GTC', 'GTN', 'HDR', 'HEL', 'HRP', 'HUG', 'ICE', 'IFI', 'IIA', 'IMC', 'IMS', 'INC', 'ING', 'INK', 'INL', 'INP', 'IPE', 'IPO', 'ITB', 'IZO', 'IZS', 'JRH'

320

In order to

In [None]:
tickers = [ticker + ".WA" for ticker in ticker_list]

We use yfinance to search for the company details such as: sector, industry,companyOfficers, shortName, longName etc.

In [None]:
yf.Ticker('06n.WA').info

{'address1': 'Grzybowska 4 lok. 96',
 'city': 'Warsaw',
 'zip': '00-131',
 'country': 'Poland',
 'phone': '48 22 630 7700',
 'fax': '48 22 630 7701',
 'website': 'https://www.magnapolonia.com.pl',
 'industry': 'Asset Management',
 'sector': 'Financial Services',
 'longBusinessSummary': 'Magna Polonia S.A., formerly known as NFI Magna Polonia S.A., is a private equity and venture capital firm specializing in investments in seed, start-up, growth capital, expansion, restructuring, and bridge financing. It typically invests in telecommunications, broadcasting, infrastructure, Internet, intelligent installations and automation, energy and information technology companies. The firm prefers to invest in Central and Eastern Europe with a focus on Poland. It prefers to invest between \x800.5 million ($0.65 million) and \x8010 million ($13.05 million) in its portfolio companies. Magna Polonia S.A. is based in Warsaw, Poland.',
 'fullTimeEmployees': 40,
 'companyOfficers': [{'maxAge': 1,
   'nam

In [None]:
company_data = []

for ticker in tickers:
    stock = yf.Ticker(ticker)
    info = stock.info
    sector = info.get('sector', 'No data')
    industry = info.get('industry', 'No data')
    name = info.get('longName', 'No data')
    short_name = info.get('shortName', 'No data')
    roe = info.get('returnOnEquity', 'No data')
    roa = info.get('returnOnAssets', 'No data')
    p_to_book = info.get('priceToBook', 'No data')
    debt_to_eq = info.get('debtToEquity', 'No data')
    beta = info.get('beta', 'No data')

    company_data.append({
        'longName': name,
        'shortName': short_name,
        'ticker': ticker,
        'sector': sector,
        'industry': industry,
        'returnOnEquity': roe,
        'p_to_book': p_to_book,
        'debt_to_eq': debt_to_eq,
        'beta': beta

    })

company_df = pd.DataFrame(company_data)

In [None]:
company_df.to_csv("/content/drive/MyDrive/Projects/network-analysis/company_financials_28102024", index=False)

# 3. Creating a dataframe with company officers data.
Lastly, I downloaded data for each listed company with information on the company officials.

In [None]:
officers_data = []

for ticker in tickers:
  stock = yf.Ticker(ticker)
  info = stock.info
  name = info.get('longName', 'No data')
  short_name = info.get('shortName', 'No data')
  companyOfficers = info.get('companyOfficers', [])

  for officer in companyOfficers:
    officer_name = officer.get('name', 'No data')
    officer_title = officer.get('title', 'No data')

    officers_data.append({
        'officer_name': officer_name,
        'officer_title': officer_title,
        'ticker': ticker
    })

officer_df = pd.DataFrame(officers_data)
officer_df

Unnamed: 0,officer_name,officer_title,ticker
0,Mr. Miroslaw Janisiewicz,President of the Management Board,06N.WA
1,Mr. Przemyslaw Piotr Marszal,President of the Management Board,11B.WA
2,Mr. Grzegorz Miechowski,Member of Management Board,11B.WA
3,Mr. Michal Wojciech Drozdowski,Member of the Management Board,11B.WA
4,Mr. Pawel Feldman,Member of the Management Board,11B.WA
...,...,...,...
1592,Mr. Wieslaw Nowak,President of the Management Board & CEO,ZUE.WA
1593,Mr. Marcin Wisniewski,Vice President of the Management Board & Direc...,ZUE.WA
1594,Mr. Maciej Nowak,Vice President of Management Board and Legal &...,ZUE.WA
1595,Mr. Jerzy Czeremuga,Vice President of the Management Board & Direc...,ZUE.WA


In [17]:
officer_df.to_csv("/content/drive/MyDrive/Projects/network-analysis/company_officers_28102024", index=False)

#4. Shareholder data
For this step I am using the ticker_list from step 1.

In [None]:
urls = []
for company in ticker_list:
  url = f'https://stooq.pl/q/h/?s={company}'
  urls.append(url)

In [None]:
wig_tables = []
for index, url in enumerate(urls):
  table = pd.read_html(url)[1].copy()
  ticker_symbol = ticker_list[index]
  table['ticker'] = ticker_symbol
  wig_tables.append(table)
wig_tables

[                                                   Lp  \
 0                                                 NaN   
 1                                                 NaN   
 2                                                 NaN   
 3                                                 NaN   
 4                                                 NaN   
 5                                                   1   
 6                                               Razem   
 7                        · Cena nominalna akcji: 1.00   
 8                                · Free Float: 75.54%   
 9         · Liczba wszystkich akcji: 13,921,975 akcji   
 10           · Akcje uprzywilejowane co do głosu: nie   
 11  document.write(Modernizr.svg?'<svg xmlns="http...   
 
                                          Akcjonariusz Udział w kapitale  \
 0                                                 NaN               NaN   
 1   Ulubione GPW, WIG20, Akcje Indeksy, Azja, Euro...               NaN   
 2              

In [None]:
final_wig_table = pd.concat(wig_tables, ignore_index=True)

In [None]:
final_wig_table['Lp'] = pd.to_numeric(final_wig_table['Lp'], errors='coerce')

In [None]:
final_wig_table = final_wig_table.dropna(subset=['Lp'])

In [None]:
final_wig_table.to_csv("/content/drive/MyDrive/Projects/network-analysis/shareholder_data_15102024", index=False)

#5. The end
I saved the data in the csv format. They will be transformed in the next step.