## Setup
---

In [None]:

%logstop
%logstart -t -r -q ipython_command_log.py global

#- IRONHACKS RESEARCH TRACKING CODE
#----------------------------------
# The following code is used to help our research team understand how you 
# our notebook environment. We do not collect any personal information with
# the following code, it is used to measure when and how often you work on
# your submission files.

import os
from datetime import datetime
import IPython.core.history as history

ha = history.HistoryAccessor()
ha_tail = ha.get_tail(1)
ha_cmd = next(ha_tail)
session_id = str(ha_cmd[0])
command_id = str(ha_cmd[1])
timestamp = datetime.utcnow().isoformat()
history_line = ','.join([session_id, command_id, timestamp]) + '\n'
logfile = open(os.environ['HOME']+'/ipython_session_log.csv', 'a')
logfile.write(history_line)
logfile.close()

In [None]:


#- INSTALL ADDITIONAL LIBRARIES IF REQUIRED
#------------------------------------------
# This is normally not required. The hub environment comes preinstaled with 
# many packages that you can already use without setup. In case there is some
# other library you would like to use that isn't on the list you run this command
# once to install them.  If it is already installed this command has no effect.


### Imports

In [None]:
#- IMPORT THE LIBRARIES YOU WILL USE
#------------------------------------------
# You only need to import packages one time per notebook session. To keep your
# notebook clean and organized you can handle all imports at the top of your file.
# The following are included for example purposed, feel free to modify or delete 
# anything in this section.

import csv
import pandas as pd
import numpy as np
from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud.bigquery import magics
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers

### Get data from BIGQUERY

##### CONFIGURE THE BIGQUERY SETTINGS

In [None]:
BIGQUERY_PROJECT = 'ironhacks-covid19-data'
BIGQUERY_KEYPATH = '/home/jovyan/service-account.json'

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = BIGQUERY_KEYPATH
bigquery_client = bigquery.Client(project=BIGQUERY_PROJECT)

##### SQL code

In [None]:
query = """
SELECT w.poi_id, w.week_number, w.visits_concentration, w.raw_visit_counts
FROM `ironhacks_covid19_competition`.`weekly_patterns` w
JOIN `ironhacks_covid19_competition`.`prediction_list_poi` pred ON pred.poi_id = w.poi_id
ORDER BY poi_id, week_number;
"""

In [None]:

# QUERY THE DATA ONCE
query_job = bigquery_client.query(query)
in_test_data = query_job.to_dataframe()

##### Check Data

In [None]:
in_test_data.head()

Unnamed: 0,poi_id,week_number,visits_concentration,raw_visit_counts
0,00243ce2-6ecc-4c63-8d58-1138476289e4,11,769.23,13
1,00243ce2-6ecc-4c63-8d58-1138476289e4,12,1570.25,11
2,00243ce2-6ecc-4c63-8d58-1138476289e4,13,2800.0,5
3,00243ce2-6ecc-4c63-8d58-1138476289e4,14,2000.0,5
4,00243ce2-6ecc-4c63-8d58-1138476289e4,15,2000.0,5


In [None]:
in_test_data.dtypes

poi_id                   object
week_number               int64
visits_concentration    float64
raw_visit_counts          int64
dtype: object

#### Data Checkpoint

In [None]:
checkpoint1 = in_test_data.copy()

---
## Preprocess
---

In [None]:
checkpoint1['poi_id'] = pd.Categorical(checkpoint1['poi_id'])
checkpoint1['poi_id'] = checkpoint1.poi_id.cat.codes

In [None]:
checkpoint1.head()

Unnamed: 0,poi_id,week_number,visits_concentration
0,0,11,769.23
1,0,12,1570.25
2,0,13,2800.0
3,0,14,2000.0
4,0,15,2000.0


In [None]:
target = checkpoint1.pop('raw_visit_counts')
dataset = tf.data.Dataset.from_tensor_slices((checkpoint1.values, target.values))

In [None]:
np.savez('data_train', inputs=checkpoint1, targets=target)

### Import NPZ data

In [None]:
npz = np.load('data_train.npz')

In [None]:
train_inputs = npz['inputs'].astype(np.float)

In [None]:
train_targets = npz['targets'].astype(np.int)

data checkpoint

In [None]:
input_size = 3
output_size = 1
hidden_layer_size = 500

def get_compiled_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_layer_size, activation='relu'), 
        tf.keras.layers.Dense(hidden_layer_size, activation='relu'), 
        tf.keras.layers.Dense(output_size)
    ])

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
    return model

#### set the batch size

In [None]:
batch_size = 43

# set a maximum number of training epochs
max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

In [None]:
model = get_compiled_model()
model.fit(train_inputs, # train inputs
          train_targets, # train targets
          batch_size=batch_size, # batch size
          epochs=max_epochs, # epochs that we will train for (assuming early stopping doesn't kick in)
          # callbacks are functions called by a task when a task is completed
          # task here is to check if val_loss is increasing
          #callbacks=[early_stopping], # early stopping
          #validation_data=(validation_inputs, validation_targets), # validation data
          verbose = 2 # making sure we get enough information about the training process
          )  

Epoch 1/100
1204/1204 - 4s - loss: 33433.4453 - accuracy: 0.0123
Epoch 2/100
1204/1204 - 5s - loss: 28439.1406 - accuracy: 0.0125
Epoch 3/100
1204/1204 - 4s - loss: 28394.6758 - accuracy: 0.0125
Epoch 4/100
1204/1204 - 4s - loss: 27895.1270 - accuracy: 0.0120
Epoch 5/100
1204/1204 - 5s - loss: 27710.4453 - accuracy: 0.0129
Epoch 6/100
1204/1204 - 4s - loss: 27136.5996 - accuracy: 0.0122
Epoch 7/100
1204/1204 - 4s - loss: 26841.5391 - accuracy: 0.0138
Epoch 8/100
1204/1204 - 4s - loss: 27066.3223 - accuracy: 0.0141
Epoch 9/100
1204/1204 - 5s - loss: 26568.0449 - accuracy: 0.0187
Epoch 10/100
1204/1204 - 4s - loss: 26670.1074 - accuracy: 0.0232
Epoch 11/100
1204/1204 - 4s - loss: 26426.6074 - accuracy: 0.0223
Epoch 12/100
1204/1204 - 4s - loss: 26542.1348 - accuracy: 0.0229
Epoch 13/100
1204/1204 - 5s - loss: 26193.0293 - accuracy: 0.0256
Epoch 14/100
1204/1204 - 4s - loss: 26023.9160 - accuracy: 0.0257
Epoch 15/100
1204/1204 - 4s - loss: 26282.7734 - accuracy: 0.0261
Epoch 16/100
1204/1

<tensorflow.python.keras.callbacks.History at 0x7f8a1442cb50>

In [None]:
temp = np.array([i for i in range(0, 1804)])

In [None]:
temp1 = np.array([44 for i in range(0, 1804)])

In [None]:
temp2 = np.array([1000 for i in range(0, 1804)])

In [None]:
predict_data = np.stack((temp, temp1, temp2))

In [None]:
predict_data = predict_data.transpose()

In [None]:
predict_df = pd.DataFrame(data=predict_data, columns=['poi_id', 'week_number','visits_concentration']).astype(np.float)

In [None]:
print(predict_df)

      poi_id  week_number  visits_concentration
0        0.0         44.0                1000.0
1        1.0         44.0                1000.0
2        2.0         44.0                1000.0
3        3.0         44.0                1000.0
4        4.0         44.0                1000.0
...      ...          ...                   ...
1799  1799.0         44.0                1000.0
1800  1800.0         44.0                1000.0
1801  1801.0         44.0                1000.0
1802  1802.0         44.0                1000.0
1803  1803.0         44.0                1000.0

[1804 rows x 3 columns]


In [None]:
result = model.predict(predict_data)

In [None]:
result.to_csv(r'\submission_prediction_output.csv', index = False)

AttributeError: 'numpy.ndarray' object has no attribute 'to_csv'