In [1]:


# # Numpy Introduction
# ## numpy arrays

import numpy as np

# Creating a 1-D array
arr = np.array([1, 3, 4, 5, 6])
print(arr)

# Checking the shape and data type of the array
print(arr.shape)
print(arr.dtype)

# Creating an array with mixed data types
arr = np.array([1, 'st', 'er', 3])
print(arr.dtype)

# Summing the array (will raise an error due to mixed types)
try:
    print(np.sum(arr))
except TypeError as e:
    print(e)

# ### Creating arrays
arr = np.array([[1, 2, 3], [2, 4, 6], [8, 8, 8]])
print(arr.shape)
print(arr)

# Creating arrays of zeros, ones, identity, and random numbers
arr = np.zeros((2, 4))
print(arr)

arr = np.ones((2, 4))
print(arr)

arr = np.identity(3)
print(arr)

arr = np.random.randn(3, 4)
print(arr)

# Reading data from a string buffer
from io import BytesIO
b = BytesIO(b"2,23,33\n32,42,63.4\n35,77,12")
arr = np.genfromtxt(b, delimiter=",")
print(arr)

# ### Accessing array elements
# #### Simple indexing
print(arr[1])

# Creating and reshaping an array
arr = np.arange(12).reshape(2, 2, 3)
print(arr)
print(arr[0])

# Slicing the array
arr = np.arange(10)
print(arr[5:])
print(arr[5:8])
print(arr[:-5])

arr = np.arange(12).reshape(2, 2, 3)
print(arr)
print(arr[1:2])

arr = np.arange(27).reshape(3, 3, 3)
print(arr)
print(arr[:, :, 2])
print(arr[..., 2])

# #### Advanced Indexing
arr = np.arange(9).reshape(3, 3)
print(arr)
print(arr[[0, 1, 2], [1, 0, 0]])

# ##### Boolean Indexing
cities = np.array(["delhi", "bangalore", "mumbai", "chennai", "bhopal"])
city_data = np.random.randn(5, 3)
print(city_data)
print(city_data[cities == "delhi"])
print(city_data[city_data > 0])
city_data[city_data > 0] = 0
print(city_data)

# #### Operations on arrays
arr = np.arange(15).reshape(3, 5)
print(arr)
print(arr + 5)
print(arr * 2)

# Broadcasting example
arr1 = np.arange(15).reshape(5, 3)
arr2 = np.arange(5).reshape(5, 1)
print(arr1)
print(arr2)
print(arr2 + arr1)

# Element-wise operations
arr1 = np.random.randn(5, 3)
print(arr1)
print(np.modf(arr1))

# #### Linear algebra using numpy
A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
B = np.array([[9, 8, 7], [6, 5, 4], [1, 2, 3]])
print(A.dot(B))

A = np.arange(15).reshape(3, 5)
print(A.T)
print(np.linalg.svd(A))

a = np.array([[7, 5, -3], [3, -5, 2], [5, 3, -7]])
b = np.array([16, -8, 0])
x = np.linalg.solve(a, b)
print(x)
print(np.allclose(np.dot(a, x), b))

# # Pandas
# ## Data frames

import pandas as pd
d = [{'city': 'Delhi', "data": 1000},
     {'city': 'Bangalore', "data": 2000},
     {'city': 'Mumbai', "data": 1000}]
df = pd.DataFrame(d)
print(df)

# ### Reading in data
city_data = pd.read_csv(filepath_or_buffer='simplemaps-worldcities-basic.csv')
print(city_data.head(n=10))
print(city_data.tail())

series_es = city_data.lat
print(type(series_es))
print(series_es[1:10:2])
print(series_es[:7])
print(series_es[:-7315])
print(city_data[:7])
print(city_data.iloc[:5, :4])

city_greater_10mil = city_data[city_data['pop'] > 10000000]
city_greater_10mil.rename(columns={'pop': 'population'}, inplace=True)
print(city_greater_10mil.where(city_greater_10mil.population > 15000000))

df = pd.DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'])
print(df.values)
df.iloc[4, 2] = np.nan
print(df)
print(df.fillna(0))

columns_numeric = ['lat', 'lng', 'pop']
print(city_data[columns_numeric].mean())
print(city_data[columns_numeric].sum())
print(city_data[columns_numeric].count())
print(city_data[columns_numeric].median())
print(city_data[columns_numeric].quantile(0.8))
print(city_data[columns_numeric].sum(axis=1).head())
print(city_data[columns_numeric].describe())

city_data1 = city_data.sample(3)
city_data2 = city_data.sample(3)
city_data_combine = pd.concat([city_data1, city_data2])
print(city_data_combine)

df1 = pd.DataFrame({'col1': ['col10', 'col11', 'col12', 'col13'],
                    'col2': ['col20', 'col21', 'col22', 'col23'],
                    'col3': ['col30', 'col31', 'col32', 'col33'],
                    'col4': ['col40', 'col41', 'col42', 'col43']},
                   index=[0, 1, 2, 3])
print(df1)

df4 = pd.DataFrame({'col2': ['col22', 'col23', 'col26', 'col27'],
                    'Col4': ['Col42', 'Col43', 'Col46', 'Col47'],
                    'col6': ['col62', 'col63', 'col66', 'col67']},
                   index=[2, 3, 6, 7])
print(pd.concat([df1, df4], axis=1))

country_data = city_data[['iso3', 'country']].drop_duplicates()
print(country_data.shape)
print(country_data.head())

del city_data['country']
print(city_data.merge(country_data, 'inner').head())

# # Scikit-learn

from sklearn import datasets
diabetes = datasets.load_diabetes()
X = diabetes.data[:10]
y = diabetes.target
print(X[:5])
print(y[:10])

# ## Scikit example regression
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV

diabetes = datasets.load_diabetes()
X_train = diabetes.data[:310]
y_train = diabetes.target[:310]

X_test = diabetes.data[310:]
y_test = diabetes.target[310:]

lasso = Lasso(random_state=0)
alphas = np.logspace(-4, -0.5, 30)

estimator = GridSearchCV(lasso, param_grid=dict(alpha=alphas))
estimator.fit(X_train, y_train)

print(estimator.best_score_)
print(estimator.best_estimator_)
print(estimator.predict(X_test))

# ## Deep Learning Frameworks

# ### Theano example 
import theano.tensor as T
from theano import function

x = T.dscalar('x')
y = T.dscalar('y')
z = x + y

f = function([x, y], z)
print(f(8, 2))

# ### Tensorflow example
import tensorflow as tf

hello = tf.constant('Hello, TensorFlow!')
sess = tf.Session()
print(sess.run(hello))

# ### Building a neural network model with Keras
from sklearn.datasets import load_breast_cancer
from keras.models import Sequential
from keras.layers import Dense, Dropout

cancer = load_breast_cancer()

X_train = cancer.data[:340]
y_train = cancer.target[:340]

X_test = cancer.data[340:]
y_test = cancer.target[340:]

model = Sequential()
model.add(Dense(15, input_dim=30, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=20, batch_size=50)

predictions = model.predict_classes(X_test)

from sklearn import metrics

print('Accuracy:', metrics.accuracy_score(y_true=y_test, y_pred=predictions))
print(metrics.classification_report(y_true=y_test, y_pred=predictions))

# ### The power of deep learning models
model = Sequential()
model.add(Dense(15, input_dim=30, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=20, batch_size=50)

predictions = model.predict_classes(X_test)

print('Accuracy:', metrics.accuracy_score(y_true=y_test, y_pred=predictions))
print(metrics.classification_report(y_true=y_test, y_pred=predictions))


[1 3 4 5 6]
(5,)
int32
<U11
ufunc 'add' did not contain a loop with signature matching types (dtype('<U11'), dtype('<U11')) -> None
(3, 3)
[[1 2 3]
 [2 4 6]
 [8 8 8]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[[ 1.30350891  0.00739411  1.79921158  0.02767476]
 [-1.51160379 -0.00325587 -1.29241368  0.68851874]
 [ 0.51909242 -0.01609646  0.6505724  -1.50184529]]
[[ 2.  23.  33. ]
 [32.  42.  63.4]
 [35.  77.  12. ]]
[32.  42.  63.4]
[[[ 0  1  2]
  [ 3  4  5]]

 [[ 6  7  8]
  [ 9 10 11]]]
[[0 1 2]
 [3 4 5]]
[5 6 7 8 9]
[5 6 7]
[0 1 2 3 4]
[[[ 0  1  2]
  [ 3  4  5]]

 [[ 6  7  8]
  [ 9 10 11]]]
[[[ 6  7  8]
  [ 9 10 11]]]
[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]
[[ 2  5  8]
 [11 14 17]
 [20 23 26]]
[[ 2  5  8]
 [11 14 17]
 [20 23 26]]
[[0 1 2]
 [3 4 5]
 [6 7 8]]
[1 3 6]
[[-0.22380098 -2.04299191 -0.00360294]
 [-0.05688469 -1.71863616 -0.17777861]
 [ 0.3

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_greater_10mil.rename(columns={'pop': 'population'}, inplace=True)


[[ 0.03807591  0.05068012  0.06169621  0.02187239 -0.0442235  -0.03482076
  -0.04340085 -0.00259226  0.01990749 -0.01764613]
 [-0.00188202 -0.04464164 -0.05147406 -0.02632753 -0.00844872 -0.01916334
   0.07441156 -0.03949338 -0.06833155 -0.09220405]
 [ 0.08529891  0.05068012  0.04445121 -0.00567042 -0.04559945 -0.03419447
  -0.03235593 -0.00259226  0.00286131 -0.02593034]
 [-0.08906294 -0.04464164 -0.01159501 -0.03665608  0.01219057  0.02499059
  -0.03603757  0.03430886  0.02268774 -0.00936191]
 [ 0.00538306 -0.04464164 -0.03638469  0.02187239  0.00393485  0.01559614
   0.00814208 -0.00259226 -0.03198764 -0.04664087]]
[151.  75. 141. 206. 135.  97. 138.  63. 110. 310.]
0.4617085978039121
Lasso(alpha=0.07880462815669913, random_state=0)
[199.94087733 178.15662017 123.03143905 212.48942817 171.72456577
 118.01035679 201.00001815 170.22184202 163.33982068 183.90802313
 190.88042058 278.2331978  288.52690865 233.65699315 206.83385406
 227.76704035 156.62863347 222.71036088 187.99209072 104

ModuleNotFoundError: No module named 'theano'