In [1]:
# get api key from .env file
import os
from dotenv import load_dotenv

load_dotenv()
API_KEY = os.getenv('NASDAQ_API_KEY')

In [2]:
# import packages
import requests, json, pandas as pd

In [3]:
# define api url
url_sample = 'https://data.nasdaq.com/api/v3/datasets/FSE/AFX_X.json?start_date=2017-01-01&end_date=2017-01-02&api_key=' + API_KEY

# make a request call
r = requests.get(url_sample)

# convert json file to dict
json_sample = r.json()

In [4]:
# checkout what the data will look like
json_sample

{'dataset': {'id': 10095370,
  'dataset_code': 'AFX_X',
  'database_code': 'FSE',
  'name': 'Carl Zeiss Meditec (AFX_X)',
  'description': 'Stock Prices for Carl Zeiss Meditec (2020-11-02) from the Frankfurt Stock Exchange.<br><br>Trading System: Xetra<br><br>ISIN: DE0005313704',
  'refreshed_at': '2020-12-01T14:48:09.907Z',
  'newest_available_date': '2020-12-01',
  'oldest_available_date': '2000-06-07',
  'column_names': ['Date',
   'Open',
   'High',
   'Low',
   'Close',
   'Change',
   'Traded Volume',
   'Turnover',
   'Last Price of the Day',
   'Daily Traded Units',
   'Daily Turnover'],
  'frequency': 'daily',
  'type': 'Time Series',
  'premium': False,
  'limit': None,
  'transform': None,
  'column_index': None,
  'start_date': '2017-01-01',
  'end_date': '2017-01-02',
  'data': [['2017-01-02',
    34.99,
    35.94,
    34.99,
    35.8,
    None,
    44700.0,
    1590561.0,
    None,
    None,
    None]],
  'collapse': None,
  'order': None,
  'database_id': 6129}}

In [5]:
# define api url
url = 'https://data.nasdaq.com/api/v3/datasets/FSE/AFX_X.json?start_date=02017-01-01&end_date=2017-12-31&api_key=' + API_KEY

# make a request call
r = requests.get(url)

# convert json file to dict
json_data = r.json()

In [6]:
print(type(json_data))

<class 'dict'>


In [7]:
json_data.keys()

dict_keys(['dataset'])

I see that we have several nested data in our current dictionary, so we can't convert it to a dataframe yet.

In [8]:
# use json_normalize() to deal with nested data
df = pd.json_normalize(json_data)
df.head()

Unnamed: 0,dataset.id,dataset.dataset_code,dataset.database_code,dataset.name,dataset.description,dataset.refreshed_at,dataset.newest_available_date,dataset.oldest_available_date,dataset.column_names,dataset.frequency,...,dataset.premium,dataset.limit,dataset.transform,dataset.column_index,dataset.start_date,dataset.end_date,dataset.data,dataset.collapse,dataset.order,dataset.database_id
0,10095370,AFX_X,FSE,Carl Zeiss Meditec (AFX_X),Stock Prices for Carl Zeiss Meditec (2020-11-0...,2020-12-01T14:48:09.907Z,2020-12-01,2000-06-07,"[Date, Open, High, Low, Close, Change, Traded ...",daily,...,False,,,,2017-01-01,2017-12-31,"[[2017-12-29, 51.76, 51.94, 51.45, 51.76, None...",,,6129


Okay `.json_normalize()` didn't work because of the structure with the nested values. However, to perfrom the analysis I want, I see that I really only need the dataset's column names and the data values. I am going to extract these lists from the dataset and create a new dataframe with them.

In [9]:
# extrace data list as a variable called data
data = json_data['dataset']['data']

In [10]:
# extract column names
column_names = json_data['dataset']['column_names']

In [11]:
# convert data and column names to a dataframe
df = pd.DataFrame(data=data, columns=column_names)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Change,Traded Volume,Turnover,Last Price of the Day,Daily Traded Units,Daily Turnover
0,2017-12-29,51.76,51.94,51.45,51.76,,34640.0,1792304.0,,,
1,2017-12-28,51.65,51.82,51.43,51.6,,40660.0,2099024.0,,,
2,2017-12-27,51.45,51.89,50.76,51.82,,57452.0,2957018.0,,,
3,2017-12-22,51.05,51.5,50.92,51.32,,71165.0,3641949.0,,,
4,2017-12-21,51.16,51.52,50.9,51.4,,120649.0,6179433.0,,,


In [12]:
df.isnull().sum()

Date                       0
Open                       3
High                       0
Low                        0
Close                      0
Change                   254
Traded Volume              0
Turnover                   0
Last Price of the Day    255
Daily Traded Units       255
Daily Turnover           255
dtype: int64

We have several null values we will want to address. Also, given that Last Price of the Day, Daily Traded Units, and Daily Turnover are all null, I will remove these columns. 

I will fill the Open null values using interpolate because it can estimate values based on linear relationships between points. This seems more appropriate than just filling forward or backwards. 

In [13]:
df.shape

(255, 11)

In [14]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Change,Traded Volume,Turnover
count,252.0,255.0,255.0,255.0,1.0,255.0,255.0
mean,43.344206,43.702804,42.924373,43.364157,-0.44,89124.337255,3853589.0
std,4.348585,4.365667,4.267225,4.321755,,60441.130541,2517807.0
min,34.0,34.12,33.62,34.06,-0.44,45.0,1980.0
25%,41.395,41.58,41.045,41.4,-0.44,56282.0,2388636.0
50%,43.45,43.55,42.62,43.28,-0.44,76286.0,3292223.0
75%,45.8575,46.195,45.39,45.85,-0.44,104479.0,4591904.0
max,53.11,53.54,52.48,53.09,-0.44,670349.0,25910540.0


In [15]:
# identify missing values in 'Open' and the one value that is not mising in 'Change'
df_missing = df[df['Open'].isnull() | df['Change'].notnull()]
df_missing

Unnamed: 0,Date,Open,High,Low,Close,Change,Traded Volume,Turnover,Last Price of the Day,Daily Traded Units,Daily Turnover
169,2017-05-01,,42.245,41.655,41.72,-0.44,86348.0,3606589.0,,,
179,2017-04-17,,42.48,41.985,42.2,,88416.0,3734717.0,,,
180,2017-04-14,,42.48,41.985,42.2,,88416.0,3734717.0,,,


In [16]:
# fill missing Open data Linear Interpolation
df['Open'] = df['Open'].interpolate(method='linear')

In [17]:
# replace 'Change' values with change in price where the value is the difference in high and low price
df['Change'] = df['High'] - df['Low']
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Change,Traded Volume,Turnover,Last Price of the Day,Daily Traded Units,Daily Turnover
0,2017-12-29,51.76,51.94,51.45,51.76,0.49,34640.0,1792304.0,,,
1,2017-12-28,51.65,51.82,51.43,51.6,0.39,40660.0,2099024.0,,,
2,2017-12-27,51.45,51.89,50.76,51.82,1.13,57452.0,2957018.0,,,
3,2017-12-22,51.05,51.5,50.92,51.32,0.58,71165.0,3641949.0,,,
4,2017-12-21,51.16,51.52,50.9,51.4,0.62,120649.0,6179433.0,,,


In [18]:
df.isnull().sum()

Date                       0
Open                       0
High                       0
Low                        0
Close                      0
Change                     0
Traded Volume              0
Turnover                   0
Last Price of the Day    255
Daily Traded Units       255
Daily Turnover           255
dtype: int64

In [19]:
# drop columns with missing values
df = df.drop(columns=['Last Price of the Day', 'Daily Traded Units', 'Daily Turnover'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 255 entries, 0 to 254
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           255 non-null    object 
 1   Open           255 non-null    float64
 2   High           255 non-null    float64
 3   Low            255 non-null    float64
 4   Close          255 non-null    float64
 5   Change         255 non-null    float64
 6   Traded Volume  255 non-null    float64
 7   Turnover       255 non-null    float64
dtypes: float64(7), object(1)
memory usage: 16.1+ KB


In [20]:
# calculate the highest and lowest opening prices for AFX_X in 2017
min_max = df['Open'].agg(['min', 'max'])
min_max

min    34.00
max    53.11
Name: Open, dtype: float64

In [21]:
# Find which day had the biggest change in price in one day
df.sort_values('Change', ascending=False).head(1)

Unnamed: 0,Date,Open,High,Low,Close,Change,Traded Volume,Turnover
161,2017-05-11,43.4,46.06,43.25,45.0,2.81,189125.0,8496322.0


In [22]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Change,Traded Volume,Turnover
0,2017-12-29,51.76,51.94,51.45,51.76,0.49,34640.0,1792304.0
1,2017-12-28,51.65,51.82,51.43,51.6,0.39,40660.0,2099024.0
2,2017-12-27,51.45,51.89,50.76,51.82,1.13,57452.0,2957018.0
3,2017-12-22,51.05,51.5,50.92,51.32,0.58,71165.0,3641949.0
4,2017-12-21,51.16,51.52,50.9,51.4,0.62,120649.0,6179433.0


In [23]:
# sort by date in ascending order
df_sorted = df.sort_values('Date', ascending=True)
df_sorted

Unnamed: 0,Date,Open,High,Low,Close,Change,Traded Volume,Turnover
254,2017-01-02,34.99,35.94,34.99,35.80,0.95,44700.0,1590561.0
253,2017-01-03,35.90,35.93,35.34,35.48,0.59,70618.0,2515473.0
252,2017-01-04,35.48,35.51,34.75,35.19,0.76,54408.0,1906810.0
251,2017-01-05,35.02,35.20,34.73,35.06,0.47,48412.0,1692326.0
250,2017-01-06,34.91,35.21,34.91,35.04,0.30,27507.0,964046.0
...,...,...,...,...,...,...,...,...
4,2017-12-21,51.16,51.52,50.90,51.40,0.62,120649.0,6179433.0
3,2017-12-22,51.05,51.50,50.92,51.32,0.58,71165.0,3641949.0
2,2017-12-27,51.45,51.89,50.76,51.82,1.13,57452.0,2957018.0
1,2017-12-28,51.65,51.82,51.43,51.60,0.39,40660.0,2099024.0


In [24]:
# create a new column to calculate the change in Closing Price
df_sorted['Change in Close'] = df_sorted['Close']-df_sorted['Close'].shift(1)

In [25]:
# calculter percent change in closing price
df_sorted['Change in Close Percent'] = df_sorted['Close'].pct_change() * 100
df_sorted.head()

Unnamed: 0,Date,Open,High,Low,Close,Change,Traded Volume,Turnover,Change in Close,Change in Close Percent
254,2017-01-02,34.99,35.94,34.99,35.8,0.95,44700.0,1590561.0,,
253,2017-01-03,35.9,35.93,35.34,35.48,0.59,70618.0,2515473.0,-0.32,-0.893855
252,2017-01-04,35.48,35.51,34.75,35.19,0.76,54408.0,1906810.0,-0.29,-0.817362
251,2017-01-05,35.02,35.2,34.73,35.06,0.47,48412.0,1692326.0,-0.13,-0.369423
250,2017-01-06,34.91,35.21,34.91,35.04,0.3,27507.0,964046.0,-0.02,-0.057045


In [26]:
# calculate absolute value of change in close to find the biggest change
df_sorted['absolute Change in Close '] = df_sorted['Change in Close Percent'].abs()

In [27]:
# largest change between days calculation
largest_change_between_days = df_sorted.sort_values('absolute Change in Close ', ascending=False).head(1)
largest_change_between_days

Unnamed: 0,Date,Open,High,Low,Close,Change,Traded Volume,Turnover,Change in Close,Change in Close Percent,absolute Change in Close
98,2017-08-09,43.5,43.5,41.64,41.81,1.86,355857.0,15003956.0,-2.56,-5.769664,5.769664


In [28]:
# calculate average trading volume
avg_trading_volume = df['Traded Volume'].mean()
avg_trading_volume

89124.33725490196

In [29]:
# calculate median trading volume
median_trading_volume = df['Traded Volume'].median()
median_trading_volume

76286.0

**Answers**
1. The highest opening price was 53.11 and the lowest opening price was 34.00 for AFX_X during this period.
2. The largest change in any one day (based on High and Low price) was on 2017-05-11 with a change of 2.81. 
3. The largest change between any two days (based on Closing Price) was on 2017-08-09 with a change of -5.769 from the previous Closing Price.
4. The average daily trading volume during this year was 89,124.
5. The median trading volume during this year was 76,286.

I was not sure how to do these calculations without using pandas to convert the dictionary into a dataframe. I prompted ChatGPT to explore options of how this could be done. It suggested trying to extract the data into lists and then performing calculations with for loops. I am going to give this a try below.

In [30]:
#separate the data and columns from the json data
data = json_data['dataset']['data']
columns = json_data['dataset']['column_names']

In [31]:
data[0]

['2017-12-29',
 51.76,
 51.94,
 51.45,
 51.76,
 None,
 34640.0,
 1792304.0,
 None,
 None,
 None]

In [32]:
columns

['Date',
 'Open',
 'High',
 'Low',
 'Close',
 'Change',
 'Traded Volume',
 'Turnover',
 'Last Price of the Day',
 'Daily Traded Units',
 'Daily Turnover']

In [33]:
# initiate empty lists for the values of interest
opens = []
highs = []
lows = []
closes = []
volumes = []

# extract data into lists
for row in data:
    opens.append(row[columns.index('Open')] if row[columns.index('Open')] is not None else float('nan'))
    highs.append(row[columns.index('High')] if row[columns.index('High')] is not None else float('nan'))
    lows.append(row[columns.index('Low')] if row[columns.index('Low')] is not None else float('nan'))
    closes.append(row[columns.index('Close')] if row[columns.index('Close')] is not None else float('nan'))
    volumes.append(row[columns.index('Traded Volume')] if row[columns.index('Traded Volume')] is not None else float('nan'))

In [34]:
print(len(opens), len(highs), len(lows), len(closes), len(volumes))

255 255 255 255 255


In [35]:
# highest and lowest opening price
highest_opening = max(opens)
lowest_opening = min(opens)
print(f'Highest opening price: {highest_opening} and lowest opening price: {lowest_opening}.')

Highest opening price: 53.11 and lowest opening price: 34.0.


In [36]:
# largest change in one day
largest_daily_change = max([high - low for high, low in zip(highs, lows)])
largest_daily_change

2.8100000000000023

In [37]:
# largest change between any two days
largest_closing_change = max([abs(closes[i] - closes[i - 1]) for i in range(1, len(closes))])
largest_closing_change

2.559999999999995

In [38]:
# average trading volume
average_volume = sum(volumes) / len(volumes)
average_volume

89124.33725490196

In [39]:
def median(lst):
    sorted_lst = sorted(lst)
    lst_len = len(sorted_lst)
    index = (lst_len - 1) // 2

    if (lst_len % 2):
        return sorted_lst[index]
    else:
        return (sorted_lst[index] + sorted_lst[index + 1]) / 2.0

median(volumes)

76286.0

I got all of the same answers as I did using pandas!