## All of the code for this project will reside within this notebook

## Datasets

For this project, we will be using the  [coinbase](https://intranet.hbtn.io/rltoken/qxNSNQUc-7AyTu4exzUxIg "coinbase")  and  [bitstamp](https://intranet.hbtn.io/rltoken/EAG7kiX_FfApWPkb2dsd6Q "bitstamp")  datasets

In [1]:
# Imports
import pandas as pd
import numpy as np

In [2]:
# Task 0. From Numpy
def from_numpy(array):
    """
    Creates a pd.DataFrame from a np.array

    Args:
        array: np.array from which a DataFrame will be created

    Returns:
        newly created pd.DataFrame
    """
    # Create column labels. The length of column labels has to match the number
    # of columns. 
    num_columns = array.shape[1]
    column_names = [chr(ord('A') + x) for x in range(num_columns)]

    task0_df = pd.DataFrame(array, columns=column_names)

    return task0_df


In [3]:
# 0-main
np.random.seed(0)
A = np.random.randn(5, 8)
print(from_numpy(A))
B = np.random.randn(9, 3)
print(from_numpy(B))

          A         B         C         D         E         F         G  \
0  1.764052  0.400157  0.978738  2.240893  1.867558 -0.977278  0.950088   
1 -0.103219  0.410599  0.144044  1.454274  0.761038  0.121675  0.443863   
2  1.494079 -0.205158  0.313068 -0.854096 -2.552990  0.653619  0.864436   
3  2.269755 -1.454366  0.045759 -0.187184  1.532779  1.469359  0.154947   
4 -0.887786 -1.980796 -0.347912  0.156349  1.230291  1.202380 -0.387327   

          H  
0 -0.151357  
1  0.333674  
2 -0.742165  
3  0.378163  
4 -0.302303  
          A         B         C
0 -1.048553 -1.420018 -1.706270
1  1.950775 -0.509652 -0.438074
2 -1.252795  0.777490 -1.613898
3 -0.212740 -0.895467  0.386902
4 -0.510805 -1.180632 -0.028182
5  0.428332  0.066517  0.302472
6 -0.634322 -0.362741 -0.672460
7 -0.359553 -0.813146 -1.726283
8  0.177426 -0.401781 -1.630198


In [4]:
# Task 1. From Dictionary
data = {
    'A': [0.0, 'one'],
    'B': [0.5, 'two'],
    'C': [1.0, 'three'],
    'D': [1.5, 'four']
    }
columns = ['First', 'Second']

task1_df = pd.DataFrame.from_dict(data, orient='index', columns=columns)

In [5]:
# 1-main
print(task1_df)

   First Second
A    0.0    one
B    0.5    two
C    1.0  three
D    1.5   four


In [14]:
# Task 2. From File
def from_file(filename, delimiter):
    """
    Loads data from a file as a dataframe

    Args:
        filename: file to load
        delimiter: delimiter for the file

    Returns:
        the loaded pd.DataFrame
    """
    if filename.split('.')[1] == 'csv':
        return pd.read_csv(filename, delimiter=delimiter)
    if filename.split('.')[1] == 'json':
        return pd.read_json(filename)
    else:
        return "This only works for .csv file and .json files."

In [32]:
# 2-main
df1 = from_file('data/coinbaseUSD_1-min_data_2014-12-01_to_2019-01-09.csv', ',')
print(df1.tail())
df2 = from_file('data/bitstampUSD_1-min_data_2012-01-01_to_2020-04-22.csv', ',')
print(df2.tail())
# df3 = from_file('/home/bsbanotto/GitHub_year_in_review/json_files/bsbanotto_commit_info.json', ',')
# print(df3.tail())

          Timestamp     Open     High      Low    Close  Volume_(BTC)  \
2099755  1546898520  4006.01  4006.57  4006.00  4006.01      3.382954   
2099756  1546898580  4006.01  4006.57  4006.00  4006.01      0.902164   
2099757  1546898640  4006.01  4006.01  4006.00  4006.01      1.192123   
2099758  1546898700  4006.01  4006.01  4005.50  4005.50      2.699700   
2099759  1546898760  4005.51  4006.01  4005.51  4005.99      1.752778   

         Volume_(Currency)  Weighted_Price  
2099755       13553.433078     4006.390309  
2099756        3614.083168     4006.017232  
2099757        4775.647308     4006.003635  
2099758       10814.241898     4005.719991  
2099759        7021.183546     4005.745614  
          Timestamp     Open     High      Low    Close  Volume_(BTC)  \
4363452  1587513360  6847.97  6856.35  6847.97  6856.35      0.125174   
4363453  1587513420  6850.23  6856.13  6850.23  6850.89      1.224777   
4363454  1587513480  6846.50  6857.45  6846.02  6857.45      7.089168   

In [34]:
# Task 3. Rename
"""
Script to rename column `Timestamp` to `Datetime`
Convert timestamp values to datetime values
Display only the Datetime and Close columns
"""
task3_df = from_file('data/coinbaseUSD_1-min_data_2014-12-01_to_2019-01-09.csv', ',')

task3_df.rename(columns={'Timestamp': 'Datetime'}, inplace=True)
task3_df['Datetime'] = pd.to_datetime(task3_df['Datetime'], unit='s')

task3_df = task3_df[['Datetime', 'Close']]

print(task3_df.tail())

                   Datetime    Close
2099755 2019-01-07 22:02:00  4006.01
2099756 2019-01-07 22:03:00  4006.01
2099757 2019-01-07 22:04:00  4006.01
2099758 2019-01-07 22:05:00  4005.50
2099759 2019-01-07 22:06:00  4005.99
