### Data Scientist
1. Python
2. Statistics


### Data Science Project Utilities

In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

print(tf.__version__)

2.0.0


In [4]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Working with file
import shutil
import os   # miscellaneous operation system interfaces
import pathlib


In [42]:
# Tensorflow Modeling
from tensorflow.keras import models
from tensorflow.keras import layers  # Choose built in layer
from tensorflow.keras import activations  # Choose activation functions
from tensorflow.keras import initializers  #Initizlizing the layer parameters
from tensorflow.keras import regularizers   # Apply regularization to the layer parameters
# prevent overfitting
from tensorflow.keras import optimizers
from tensorflow.keras import losses # loss function
from tensorflow.keras import metrics  # Model perfromance
from tensorflow.keras import applications  # pre-trained model

In [43]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

### Tensorflow 2.0

TensorFlow 2.0 focuses on simplicity and ease of use, with updates like eager execution, intuitive higher-level APIs, and flexible model building on any platform.

TensorFlow 2.0 is built on the following key ideas:

* Let users run their computation eagerly, like they would in Numpy. This makes TensorFlow 2.0 programming intuitive and Pythonic.
* Preserve the considerable advantages of compiled graphs (for performance, distribution, and deployment). This makes TensorFlow fast, scalable, and production-ready.
* Leverage Keras as its high-level deep learning API, making TensorFlow approachable and highly productive.
* Extend Keras into a spectrum of workflows ranging from the very high-level (easier to use, less flexible) to the very low-level (requires more expertise, but provides great flexibility).

![alt text](https://github.com/Amin-Tgz/awesome-tensorflow-2/raw/master/imgs/TF.png)

In a graph we have nodes and edges. And the tensors flow through the edges into the nodes which are operations. Tensors are quite similar with the N-d arrays in Numpy. And you can find that many keywords are very similar too:

![alt text](https://miro.medium.com/max/4044/1*c8vKRVnog07DK6D5WJ3f3Q.png)

## Tensorflow basics


In [45]:
x = tf.constant([[5, 2], [1, 3]])
print(x)

tf.Tensor(
[[5 2]
 [1 3]], shape=(2, 2), dtype=int32)


In [47]:
y = x.numpy()
y

array([[5, 2],
       [1, 3]])

In [50]:
print(type(x), type(y), sep='\n')

<class 'tensorflow.python.framework.ops.EagerTensor'>
<class 'numpy.ndarray'>


In [51]:
# common way to create constant tensors
print(tf.ones(shape=(2, 1)))
print(tf.zeros(shape=(2, 1)))


tf.Tensor(
[[1.]
 [1.]], shape=(2, 1), dtype=float32)
tf.Tensor(
[[0.]
 [0.]], shape=(2, 1), dtype=float32)


In [52]:
# random constant tensors
tf.random.normal(shape=(2, 2), mean=0, stddev=1.)

<tf.Tensor: id=1001, shape=(2, 2), dtype=float32, numpy=
array([[ 1.5521363 , -0.18979156],
       [ 0.5543372 ,  0.44434628]], dtype=float32)>

In [53]:
# integer tensor with values drawn from a random uniform distribution

tf.random.uniform(shape=(2, 2), minval=0, maxval=10, dtype='int32')


<tf.Tensor: id=1005, shape=(2, 2), dtype=int32, numpy=
array([[1, 0],
       [2, 0]])>

In [55]:
# Variables are special tensors used to store mutable state
initial_value = tf.random.normal(shape=(2, 2))
a = tf.Variable(initial_value)
print(a)

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[-2.1456008 ,  0.62954944],
       [-1.2231469 , -1.7980956 ]], dtype=float32)>


In [58]:
new_value = tf.random.normal(shape=(2, 2))
a.assign(new_value)
for i in range(2):
    for j in range(2):
        assert a[i, j] == new_value[i, j] 
        

In [59]:
added_value = tf.random.normal(shape=(2, 2))
a.assign_add(added_value)
for i in range(2):
    for j in range(2):
        assert a[i, j] == new_value[i, j] + added_value[i, j]
        

In [60]:
a = tf.random.normal(shape=(2, 2))
b = tf.random.normal(shape=(2, 2))

c = a + b
d = tf.square(c)
e = tf.exp(d)
print(d)
print(e)


tf.Tensor(
[[ 2.271442   17.40568   ]
 [ 0.18914679  0.07354113]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[9.6933680e+00 3.6240204e+07]
 [1.2082183e+00 1.0763128e+00]], shape=(2, 2), dtype=float32)


### Linear Regression


In [62]:
# build linear model

input_dim = 2
output_dim = 1
learning_rate = 0.01

# this is out weight matrix

w = tf.Variable(tf.random.uniform(shape=(input_dim, output_dim)))

#this is our bias vector
b = tf.Variable(tf.zeros(shape=(output_dim,)))

def compute_predictions(features):
    return tf.matmul(features, w) + b

def compute_loss(labels, predictions):
    return tf.reduce_mean(tf.square(labels = predictions))


def train_on_batch(x, y):
    with tf.GradientTape() as tape:
        predictions = compute_predictions(x)
        loss = compute_loss(y, predictions)
        dloss_dw, dloss_db = tape.gradient(loss, [w, b])
    w.assign_sub(learning_rate * dloss_dw)
    b.assign_sub(learning_rate * dloss_db)
    
    return loss

        

### Keras API

![Spectrum of Keras workflows](https://keras-dev.s3.amazonaws.com/tutorials-img/spectrum-of-workflows.png)

In [40]:
def build_lenet5():
    model = tf.keras.models.Sequential([
        Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax'),
        
    ])
    return model

model = build_lenet5()
model.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dropout_5 (Dropout)          (None, 128)             

In [36]:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import Sequential

In [37]:
def build_alexnet():
    model = Sequential([
        Conv2D(96, kernel_size=(11, 11), strides=(4,4), activation='relu', input_shape=(224, 224, 3)),
        MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
        BatchNormalization(),

        Conv2D(256, kernel_size=(5, 5), activation='relu'),
        MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
        BatchNormalization(),

        Conv2D(384, kernel_size=(3, 3), padding='same', activation='relu'),
        Conv2D(384, kernel_size=(3, 3), padding='same', activation='relu'),
        Conv2D(256, kernel_size=(3, 3), padding='same', activation='relu'),
        MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
        BatchNormalization(),

        Flatten(),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(2, activation='softmax')
    ])
    return model

model = build_alexnet()
model.summary()

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 54, 54, 96)        34944     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 26, 26, 96)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 26, 26, 96)        384       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 22, 22, 256)       614656    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 10, 10, 256)       0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 10, 10, 256)       1024      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 10, 10, 384)     

In [38]:
# def build_alexnet():
#     model = tf.keras.models.Sequential([
#         Conv2D(96, kernel_size=(11, 11), strides=(4,4), activation='relu', input_shape=(224, 224, 3)),
#         MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
#         BatchNormalization(),

#         Conv2D(256, kernel_size=(5, 5), activation='relu'),
#         MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
#         BatchNormalization(),

#         Conv2D(384, kernel_size=(3, 3), padding='same', activation='relu'),
#         Conv2D(384, kernel_size=(3, 3), padding='same', activation='relu'),
#         Conv2D(256, kernel_size=(3, 3), padding='same', activation='relu'),
#         MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
#         BatchNormalization(),

#         Flatten(),
#         Dense(4096, activation='relu'),
#         Dropout(0.5),
#         Dense(4096, activation='relu'),
#         Dropout(0.5),
#         Dense(2, activation='softmax')
#     ])
#     return model

# model = build_alexnet()
# model.summary()

### Callback


In [None]:
class CollectBatchStats(tf.keras.callbacks.Callback):
    def __init__(self):
        self.batch_losses = []
        self.batch_acc = []
        self.batch_val_losses = []
        self.batch_val_acc = []

    def on_train_batch_end(self, batch, logs=None):
        self.batch_losses.append(logs['loss'])
        self.batch_acc.append(logs['acc'])
        # reset_metrics: the metrics returned will be only for this batch. 
        # If False, the metrics will be statefully accumulated across batches.
        self.model.reset_metrics()
  
    def on_test_batch_end(self, batch, logs=None):
        self.batch_val_losses.append(logs['loss'])
        self.batch_val_acc.append(logs['acc'])
        # reset_metrics: the metrics returned will be only for this batch. 
        # If False, the metrics will be statefully accumulated across batches.
        self.model.reset_metrics()

def plot_stats(training_stats, val_stats, x_label='Training Steps', stats='loss'):
    stats, x_label = stats.title(), x_label.title()
    legend_loc = 'upper right' if stats=='loss' else 'lower right'
    training_steps = len(training_stats)
    test_steps = len(val_stats)

    plt.figure()
    plt.ylabel(stats)
    plt.xlabel(x_label)
    plt.plot(training_stats, label='Training' + stats)
    plt.plot(np.linspace(0, training_steps, test_steps), val_stats, label='Validation' + stats)
    plt.ylim([0,max(plt.ylim())])
    plt.legend(loc=legend_loc)
    plt.show()

batch_stats_callback = CollectBatchStats()

A core principle of Keras is "progressive disclosure of complexity": it's easy to get started, and you can gradually dive into workflows where you write more and more logic from scratch, providing you with complete control.

This applies to both model definition, and model training.

![Model definition: spectrum of workflows](https://keras-dev.s3.amazonaws.com/tutorials-img/model-building-spectrum.png)

![Model training: spectrum of workflows](https://keras-dev.s3.amazonaws.com/tutorials-img/model-training-spectrum.png)

In [2]:
os.getcwd()

'C:\\Users\\HT\\Desktop\\Coderschool\\Mydoing\\Data Science Project'

In [4]:

import builtins
len(dir(builtins))

154

In [5]:
def square(x):
    return x**2

In [6]:
number = list(range(1, 4))
y = map(lambda x: x**2, number)
print(list(y))


[1, 4, 9]


In [None]:
# # Function
# 1. enumerate
# 2. zip
# 3. map


In [None]:
# Pandas
1. concat
2. melt
3. pivot
4. crosstab
5. 

In [7]:
# building data frame from scratch

country = ['Spain', 'France']
population = ['11', '12']
list_label = ['country', 'population']
list_col = [country, population]

zipped = list(zip(list_label, list_col))
print(zipped)

[('country', ['Spain', 'France']), ('population', ['11', '12'])]


In [8]:
data_dict = dict(zipped)
df = pd.DataFrame(data_dict)
df

Unnamed: 0,country,population
0,Spain,11
1,France,12


In [9]:
se = pd.Series(population)
se

0    11
1    12
dtype: object

In [10]:
type(se)

pandas.core.series.Series

In [11]:

se.unique()

array(['11', '12'], dtype=object)

In [12]:
df.nunique()

country       2
population    2
dtype: int64

In [13]:
df['capital'] = pd.Series(['madrid', 'paris'])
df

Unnamed: 0,country,population,capital
0,Spain,11,madrid
1,France,12,paris


In [14]:

# Broadcasting
df['income'] = 0
df

Unnamed: 0,country,population,capital,income
0,Spain,11,madrid,0
1,France,12,paris,0


In [None]:
# # # plot data
# # fig, ax = plt.subplots()
# ax = plt.figure(figsize=(15, 7))
# # data.plot(kind='type', x='title', y='title')
# # plt.show()

In [16]:
type(df[['population']])

pandas.core.frame.DataFrame

In [None]:
# # filter in pandas

# condition_1 = data['columns'] > 100
# data[condition_1]
# condition_2 = data['columns2'] != 100
# data[condition_1 & condition_2]


In [None]:
# # Flter column based on other columns
# data['column3'][condition_2]

In [None]:
# # transforming data
# def funtion1(x):
#     return x/2

# data['column4'].apply(function1)
# data['column4'].apply(lambda x: x/)


In [None]:
# # Creat new column
# data['column5'] = data['column1'] + data['column2']


In [17]:
# Pivoting data frame
dic = {'treatment': ['A', 'A', 'B', 'B'],
       'gender': ['F', 'M', 'F', 'M'],
       'response': [10, 45, 5, 9],
       'age': [15, 4, 72, 65],
       }
df = pd.DataFrame(dic)
df

Unnamed: 0,treatment,gender,response,age
0,A,F,10,15
1,A,M,45,4
2,B,F,5,72
3,B,M,9,65


In [18]:
# pivoting
df.pivot(index='treatment', columns='gender', values='response')


gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,45
B,5,9


In [21]:
df.pivot(index='gender', columns= 'age', values='response')


age,4,15,65,72
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
F,,10.0,,5.0
M,45.0,,9.0,
