In [4]:
import os
import csv

# Settings
DATA_FILE = os.path.join('data', '2017.csv')
NUM_OF_TOP_ITEMS = 3

# Utility functions
def get_ntop_items(ntop_items, datadict):
    if len(datadict) < ntop_items:
        raise ValueError("Not enough items in data dictionary.")
    else:
        return dict(sorted(datadict.items(), key=lambda item: -item[1])[:ntop_items])

## Question 1: What exchange had the most transactions in the file?

### Answer: 'LSE'

In [8]:
# Solution to Question 1

exchange_summary = {}
with open(DATA_FILE, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        exchange_name = row['exchange']
        if exchange_name not in exchange_summary:
            exchange_summary[exchange_name] = 1
        else:
            exchange_summary[exchange_name] += 1

top_exchanges = get_ntop_items(NUM_OF_TOP_ITEMS, exchange_summary)
top_exchanges

{'off exchange': 9588, 'LSE (UK)': 3146, 'Euronext Paris (France)': 2889}

## Question 2: In August 2017, which company had the highest combined value EUR?
### Answer: 'HSBC HLDGS PLC'

In [7]:
# Solution to Question 2
# In order to solve the Question 2 I'm going to sum up values in EUR for each company in the csv file.

company_summary = {}
with open(DATA_FILE, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        trade_date = str(row['tradedate'])
        if trade_date >= '20170801' and row['tradedate'] < '20170901':
            company_name = row['companyName']
            company_value = float(row['valueEUR'])
            if company_name not in company_summary:
                company_summary[company_name] = company_value
            else:
                company_summary[company_name] += company_value

# Top 3 companies
top_companies = get_ntop_items(NUM_OF_TOP_ITEMS, company_summary)
top_companies

{'HSBC HLDGS PLC': 3311729433.4614997,
 'EUROSIC SA': 1310917260.0,
 'CHRISTIAN DIOR SE': 228900489.802}

## Question 3: For 2017, only considering transactions with tradeSignificance 3, what is the percentage of transactions per month?

In [14]:
trade_summary = {}
with open(DATA_FILE, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    num_trades = 0
    num_s3_trades = 0
    for row in reader:
        trade_date = str(row['tradedate'])
        trade_year = '2017'
        if trade_date >= '20170101' and trade_date < '20180101':
            trade_year_month = trade_year + '.' + trade_date[4:6]
            trades_s3 = 0
            if int(row['tradeSignificance']) == 3:
                trades_s3 = 1
            if trade_year_month not in trade_summary:
                trade_summary[trade_year_month] = {'trades': 1, 'trades_s3': trades_s3}
            else:
                trade_summary[trade_year_month]['trades'] += 1
                trade_summary[trade_year_month]['trades_s3'] += trades_s3


for item in trade_summary:
    trade_summary[item]['s3_pct'] = round(trade_summary[item]['trades_s3'] / trade_summary[item]['trades'] * 100, 2)

# Percentage of S3 operations from total
print("\n*** Percentage of S3 operations from total operations (in 2017).")
for k, v in sorted(trade_summary.items()):
    print(f"{k}: {v['s3_pct']} %")

# Distribution of S3 Operations by Month in 2017
print("\n*** Distribution of S3 operations by month (in 2017)")
total_s3 = sum(val['trades_s3'] for val in trade_summary.values())
for k, v in sorted(trade_summary.items()):
    s3_monthly_pct = round(v['trades_s3'] / total_s3 * 100, 2)
    print(f"{k}: {s3_monthly_pct} %")


*** Percentage of S3 operations from total operations (in 2017).
2017.01: 59.29 %
2017.02: 49.45 %
2017.03: 45.03 %
2017.04: 45.2 %
2017.05: 50.82 %
2017.06: 53.08 %
2017.07: 51.81 %
2017.08: 70.52 %
2017.09: 64.35 %
2017.10: 59.69 %
2017.11: 57.47 %
2017.12: 55.54 %

*** Distribution of S3 operations by month (in 2017)
2017.01: 7.62 %
2017.02: 8.41 %
2017.03: 12.67 %
2017.04: 7.27 %
2017.05: 10.98 %
2017.06: 1.09 %
2017.07: 0.4 %
2017.08: 8.78 %
2017.09: 12.59 %
2017.10: 10.01 %
2017.11: 11.23 %
2017.12: 8.96 %
