<a href="https://colab.research.google.com/github/alanntl/SENG3011/blob/alanntl/Event_dataset_datamodel_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Generic data model exmaple

In [None]:
import json
from datetime import datetime
from IPython.display import clear_output 
import numpy as np
import pandas as pd
import pytz
from time import gmtime, strftime

!pip install yahoo_fin --upgrade --no-cache-dir
clear_output()

In [None]:
now= datetime.now()
date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
tzs = 'GMT+11'
tza = 'GMT-5'
print(date_time)
print(tzs)
print(tza)


2023-01-24 07:18:50.342216
GMT+11
GMT-5


# Tool for visualizing json format
https://jsoncrack.com/editor

# 1.Yahoo finance 

## a. yahoo finance quote data

In [None]:
# get raw data

import yahoo_fin.stock_info as si
quote_table = si.get_quote_table("aapl")
quote_table


{'1y Target Est': 171.48,
 '52 Week Range': '124.17 - 179.61',
 'Ask': '0.00 x 800',
 'Avg. Volume': 81832000.0,
 'Beta (5Y Monthly)': 1.27,
 'Bid': '0.00 x 900',
 "Day's Range": '137.90 - 143.31',
 'EPS (TTM)': 6.11,
 'Earnings Date': 'Feb 02, 2023',
 'Ex-Dividend Date': 'Nov 04, 2022',
 'Forward Dividend & Yield': '0.92 (0.67%)',
 'Market Cap': '2.235T',
 'Open': 138.12,
 'PE Ratio (TTM)': 23.09,
 'Previous Close': 137.87,
 'Quote Price': 141.11000061035156,
 'Volume': 81760313.0}

In [None]:
# Our data model
jsonfile_quote = {"data_source":"yahoo_finance", "dataset_type_id": "yf002", "timestamp":date_time, "timezone":tzs,
             "events":[{"timestamp":date_time, "timezone":tza, "event_type":'stock quote',   "attribute":quote_table, }],  
             }
# Write pretty print JSON data to file
with open("jsonfile_quote.json", "w") as write_file:
    json.dump(jsonfile_quote, write_file, indent=4)

jsonfile_quote

{'data_source': 'yahoo_finance',
 'dataset_type_id': 'yf002',
 'timestamp': '2023-01-24 07:18:50.342216',
 'timezone': 'GMT+11',
 'events': [{'timestamp': '2023-01-24 07:18:50.342216',
   'timezone': 'GMT-5',
   'event_type': 'stock quote',
   'attribute': {'1y Target Est': 171.48,
    '52 Week Range': '124.17 - 179.61',
    'Ask': '0.00 x 800',
    'Avg. Volume': 81832000.0,
    'Beta (5Y Monthly)': 1.27,
    'Bid': '0.00 x 900',
    "Day's Range": '137.90 - 143.31',
    'EPS (TTM)': 6.11,
    'Earnings Date': 'Feb 02, 2023',
    'Ex-Dividend Date': 'Nov 04, 2022',
    'Forward Dividend & Yield': '0.92 (0.67%)',
    'Market Cap': '2.235T',
    'Open': 138.12,
    'PE Ratio (TTM)': 23.09,
    'Previous Close': 137.87,
    'Quote Price': 141.11000061035156,
    'Volume': 81760313.0}}]}

## b. yahoo finance OHLC data

In [None]:
# get raw data
import yahoo_fin.stock_info as si
ohlc = si.get_data("aapl")
ohlc['date'] = ohlc.index
ohlc['date'] = pd.to_datetime(ohlc['date'],format='%Y-%m-%d %00:00:00.00 %Z' ).astype(str)
ohlc.head()

Unnamed: 0,open,high,low,close,adjclose,volume,ticker,date
1980-12-12,0.128348,0.128906,0.128348,0.128348,0.099874,469033600,AAPL,1980-12-12
1980-12-15,0.12221,0.12221,0.121652,0.121652,0.094663,175884800,AAPL,1980-12-15
1980-12-16,0.113281,0.113281,0.112723,0.112723,0.087715,105728000,AAPL,1980-12-16
1980-12-17,0.115513,0.116071,0.115513,0.115513,0.089886,86441600,AAPL,1980-12-17
1980-12-18,0.118862,0.11942,0.118862,0.118862,0.092492,73449600,AAPL,1980-12-18


In [None]:
# transform in to json
import ast
ohlc_json = ohlc.to_json(orient="records")
ohlc_json = ast.literal_eval(ohlc_json)
ohlc_json = ohlc_json[0:3]
ohlc_json

[{'open': 0.1283479929,
  'high': 0.1289059967,
  'low': 0.1283479929,
  'close': 0.1283479929,
  'adjclose': 0.0998738259,
  'volume': 469033600,
  'ticker': 'AAPL',
  'date': '1980-12-12'},
 {'open': 0.1222100034,
  'high': 0.1222100034,
  'low': 0.1216519997,
  'close': 0.1216519997,
  'adjclose': 0.0946633369,
  'volume': 175884800,
  'ticker': 'AAPL',
  'date': '1980-12-15'},
 {'open': 0.1132809967,
  'high': 0.1132809967,
  'low': 0.1127230003,
  'close': 0.1127230003,
  'adjclose': 0.0877152607,
  'volume': 105728000,
  'ticker': 'AAPL',
  'date': '1980-12-16'}]

In [None]:
# Our data model
jsonfile_ohlc = {"data_source":"yahoo_finance", "dataset_type_id": "yf001", "timestamp":date_time, "timezone":tzs,
          "events":[]}
for i in range(len(ohlc_json)):
  datetime_object = pd.to_datetime(ohlc_json[i]['date'], format='%Y-%m-%d %H:%M:%S.%f' )
  datetime_object= datetime_object.strftime('%Y-%m-%d %H:%M:%S.%f')
  jsonfile_ohlc['events'].append({"timestamp":datetime_object,"timezone":tza, "event_type":'stock ohlc timeseries',   "attribute":ohlc_json[i] })

# Write pretty print JSON data to file
with open("jsonfile_ohlc.json", "w") as write_file:
    json.dump(jsonfile_ohlc, write_file, indent=4)
    
jsonfile_ohlc

{'data_source': 'yahoo_finance',
 'dataset_type_id': 'yf001',
 'timestamp': '2023-01-24 07:18:50.342216',
 'timezone': 'GMT+11',
 'events': [{'timestamp': '1980-12-12 00:00:00.000000',
   'timezone': 'GMT-5',
   'event_type': 'stock ohlc timeseries',
   'attribute': {'open': 0.1283479929,
    'high': 0.1289059967,
    'low': 0.1283479929,
    'close': 0.1283479929,
    'adjclose': 0.0998738259,
    'volume': 469033600,
    'ticker': 'AAPL',
    'date': '1980-12-12'}},
  {'timestamp': '1980-12-15 00:00:00.000000',
   'timezone': 'GMT-5',
   'event_type': 'stock ohlc timeseries',
   'attribute': {'open': 0.1222100034,
    'high': 0.1222100034,
    'low': 0.1216519997,
    'close': 0.1216519997,
    'adjclose': 0.0946633369,
    'volume': 175884800,
    'ticker': 'AAPL',
    'date': '1980-12-15'}},
  {'timestamp': '1980-12-16 00:00:00.000000',
   'timezone': 'GMT-5',
   'event_type': 'stock ohlc timeseries',
   'attribute': {'open': 0.1132809967,
    'high': 0.1132809967,
    'low': 0.112

## c. exmaple yahoo finance news data

In [None]:
# get raw data
from yahoo_fin import news

news_raw = news.get_yf_rss("aapl")
news_raw[0] # only use 1 news


{'summary': 'John Rotonti, head of investor training and development at The Motley Fool, talks with Jurrien Timmer, director of global macro at Fidelity Investments, about what history can teach about the current market cycle and sectors that may hold opportunities for investors.  Motley Fool Stock Advisor is open to new members for just $99 a year.',
 'summary_detail': {'type': 'text/html',
  'language': None,
  'base': 'https://feeds.finance.yahoo.com/rss/2.0/headline?s=aapl&region=US&lang=en-US',
  'value': 'John Rotonti, head of investor training and development at The Motley Fool, talks with Jurrien Timmer, director of global macro at Fidelity Investments, about what history can teach about the current market cycle and sectors that may hold opportunities for investors.  Motley Fool Stock Advisor is open to new members for just $99 a year.'},
 'id': '15179e1d-766c-3717-9813-aac18def64ce',
 'guidislink': False,
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'http

In [None]:
# Our data model
jsonfile_news1 = {"data_source":"yahoo_finance", "dataset_type_id": "yf003", "timestamp":date_time, "timezone":tzs,
            "events":[]  
            }

print("======================news 1======================")

for i in range(len(news_raw)):
  datetime_str = news_raw[0]['published'][5:]
  datetime_object = datetime.strptime(datetime_str, '%d %b %Y %H:%M:%S +0000')
  datetime_object = pd.to_datetime(datetime_object, format='%Y-%m-%d %H:%M:%S.%f' )
  datetime_object= datetime_object.strftime('%Y-%m-%d %H:%M:%S.%f')
  jsonfile_news1["events"].append({"timestamp":datetime_object, "timezone":tza, "event_type":'stock news',   
                                   "attribute":news_raw[i] } )
  if i == 1:
    break

# Write pretty print JSON data to file
with open("jsonfile_news.json", "w") as write_file:
    json.dump(jsonfile_news1, write_file, indent=4)

jsonfile_news1




{'data_source': 'yahoo_finance',
 'dataset_type_id': 'yf003',
 'timestamp': '2023-01-24 07:18:50.342216',
 'timezone': 'GMT+11',
 'events': [{'timestamp': '2023-01-24 02:21:00.000000',
   'timezone': 'GMT-5',
   'event_type': 'stock news',
   'attribute': {'summary': 'John Rotonti, head of investor training and development at The Motley Fool, talks with Jurrien Timmer, director of global macro at Fidelity Investments, about what history can teach about the current market cycle and sectors that may hold opportunities for investors.  Motley Fool Stock Advisor is open to new members for just $99 a year.',
    'summary_detail': {'type': 'text/html',
     'language': None,
     'base': 'https://feeds.finance.yahoo.com/rss/2.0/headline?s=aapl&region=US&lang=en-US',
     'value': 'John Rotonti, head of investor training and development at The Motley Fool, talks with Jurrien Timmer, director of global macro at Fidelity Investments, about what history can teach about the current market cycle an