# Intro Tutorial

## 1. Opening a notebook

1. Open https://filrad.soic.indiana.edu:8080
2. More details about opening the notebook are located at 'Announcements/Access to the notebook server'


TEST ON SERVeR


## 2. Cells

Drop down menu has three options. 
1. 'Code' is for running code
2. 'Markdown' is for typing notes like these. [Here](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet) is a tutorial. This is useful for explaining code and graphs when submitting assignments

press Shift+Enter to run code

In [1]:
1+2

3

In [3]:
a = 3

Variables are saved and can be used in the next cell. This is useful if you want to do step by step calculations

In [4]:
a + 2

5

## 3. Other buttons

1. Save
2. Add a new cell
3. Cut cell
4. Copy cell
5. Paste cell
6. Run cell
7. Stop running cell
8. Restart the kernel
9. Create a new notebook


In [12]:
# import statements
#%matplotlib inline 
#import matplotlib.pyplot as plt 
# Import matplotlib for graphs and visualizations
# lets you plot inside the notebook

import networkx as nx # analysis for networks

import pandas as pd # analysis for csv type data

import numpy as np # analysis for matrices and vectors

import pickle # convenient way to save data

import random
import matplotlib.pyplot as plt

## 4. matplotlib

In [8]:
# Line charts

x = [1,3,5,10]
y = [2,2.5,10,8]

# You can adjust size of the figure using figsize parameter
fig = plt.figure(figsize=(5,3))

plt.plot(x,y, 'o-')
plt.xlim(xmax=11, xmin=0) # Limits of the axes
plt.ylim(ymax=11, ymin=0)

plt.xlabel('X axis', fontsize=12) # Label text and its properties
plt.ylabel('Y axis', fontsize=12)

# Font properties for axes ticks
plt.tick_params(axis='both', which='major', labelsize=12)

# You can put a title and customize it using title function
plt.title('title', fontsize=14, color='r')

# plt.savefig('line-chart.png', bbox_inches='tight', dpi=300)
# plt.savefig('line-chart.pdf', bbox_inches='tight', dpi=300)

Text(0.5,1,'title')

In [9]:
# Bar chart

x = [1,2,3,4,5]
y = [5,10,4,8,6]

plt.bar(x,y)

<Container object of 5 artists>

In [10]:
# Multiple barchart

x = np.arange(5)
y1 = [5,10,4,8,6]
y2 = [6,9,5,9,5]

w = 0.3 # separation of bar charts

plt.bar(x, y1, width=w, color='r')
plt.bar(x+0.4, y2, width=w)

plt.xticks(x + (w+0.4)/2, ('A', 'B', 'C', 'D', 'E')) # name of each bar

# Font properties for axes ticks
plt.tick_params(axis='both', which='major', labelsize=16)

In [13]:
# Histograms can also be plotted similar to barcharts

# Let's create some number at random
x = list()
for i in range(100):
    x.append(random.gauss(0,1))
    
plt.hist(x, alpha=0.5) # Hist function computes histogram

plt.xlabel('Random value: x', fontsize=12)
plt.ylabel('Histogram', fontsize=12)

Text(0,0.5,'Histogram')

In [14]:
# Scatter plot
x, y = list(), list()

for n in range(100): # make random x y values
    x.append(random.gauss(0,2))
    y.append(random.gauss(2,1))
    
plt.scatter(x,y, label='Blue', s=100, alpha=0.5, linewidth=0)

x2, y2 = list(), list()

for n in range(100): # make another set of random x y values
    x2.append(random.uniform(-1,11))
    y2.append(random.uniform(-1,11))
    
plt.scatter(x2, y2, label='Red', s=100, c='r', alpha=0.5, linewidth=0) # plot them

plt.legend(loc='upper left', fontsize=14, scatterpoints=1) # make a legend

<matplotlib.legend.Legend at 0x115f3b908>

In [15]:
# Subplots are useful when you are planning to plot more than one figure at the same time.

fig, axarr = plt.subplots(2,2, figsize=(6,6)) # 4 plots
plt.tight_layout()

xvals = [0,1,2,3,4]

axarr[0,0].plot(xvals, [2,4,6,8,10], 'o') # plot
axarr[0,1].plot(xvals, [-2,-4,-6,-8,-10], 'r-o') # plot with red circle
axarr[1,0].hist([1,1,1,2,3,4,4,5,5,5,5]) # histogram
axarr[1,1].scatter([1,2,3,1.5,2.5], [3,1,2,2.5,4], s=50, marker='s') # scatterplot

<matplotlib.collections.PathCollection at 0x116083550>

In [16]:
fig, axarr = plt.subplots(2, figsize=(6,6), sharex=True)
plt.tight_layout()

xvals = np.linspace(-2*np.pi, 2*np.pi, 100)
sinx = np.sin(xvals)
cosx = np.cos(xvals)

axarr[0].plot(xvals, sinx, c='r', linewidth=5)
axarr[0].set_ylabel('Sin(x)', fontsize=14)

axarr[1].plot(xvals, cosx, 'o-')
axarr[1].set_ylabel('Cos(x)', fontsize=14)

Text(27.125,0.5,'Cos(x)')

## 5. Networks

In [17]:
graph = nx.Graph() # type of graph
graph.add_node(1)
graph.add_nodes_from([2, 3,4,5])
graph.add_edges_from([(1, 2), (1, 3),(4,5),(1,5)])
nx.draw_networkx(graph)

In [19]:
options = { # set options for the drawing
    'node_color': 'yellow',
    'node_size': 500,
    'width': 3,
}

graph = nx.gnp_random_graph(20,0.2) # makes a random graph given number of nodes and probability of an edge
nx.draw_networkx(graph, **options)
#plt.savefig("path.png")

In [20]:
# get list of nodes
print(graph.nodes())
print(graph.edges())

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
[(0, 4), (0, 13), (1, 8), (1, 12), (1, 14), (1, 15), (1, 17), (2, 3), (2, 5), (2, 9), (2, 11), (2, 12), (2, 14), (2, 17), (3, 4), (4, 8), (4, 14), (5, 16), (6, 7), (6, 9), (6, 10), (6, 14), (7, 12), (8, 10), (8, 12), (8, 16), (9, 16), (9, 17), (10, 14), (10, 15), (11, 12), (11, 13), (11, 19), (13, 17), (14, 17)]


In [21]:
# give attributes, such as weight

for e1,e2 in graph.edges():
    graph[e1][e2]['weight'] = random.randint(0,3)

In [22]:
# access attributes
nx.get_edge_attributes(graph,'weight')

{(0, 4): 0,
 (0, 13): 0,
 (1, 8): 2,
 (1, 12): 2,
 (1, 14): 1,
 (1, 15): 1,
 (1, 17): 1,
 (2, 3): 2,
 (2, 5): 1,
 (2, 9): 0,
 (2, 11): 2,
 (2, 12): 1,
 (2, 14): 2,
 (2, 17): 1,
 (3, 4): 3,
 (4, 8): 3,
 (4, 14): 1,
 (5, 16): 0,
 (6, 7): 3,
 (6, 9): 1,
 (6, 10): 2,
 (6, 14): 2,
 (7, 12): 2,
 (8, 10): 3,
 (8, 12): 0,
 (8, 16): 2,
 (9, 16): 3,
 (9, 17): 3,
 (10, 14): 2,
 (10, 15): 1,
 (11, 12): 0,
 (11, 13): 2,
 (11, 19): 0,
 (13, 17): 1,
 (14, 17): 2}

In [23]:
DG = nx.DiGraph() # directed graph
DG.add_weighted_edges_from([(1, 2, 0.5), (3, 1, 0.75)])
DG.out_degree(1, weight='weight')
DG.degree(1, weight='weight')

nx.draw_networkx(DG)

print('Successors =',list(DG.successors(1)))

print('Predecessors =',list(DG.predecessors(1)))


Successors = [2]
Predecessors = [3]


## 6. Pandas

In [24]:
df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,-0.951672,0.208343,-1.490307,-0.240304
1,-2.047679,-0.251076,2.463693,0.519645
2,0.01078,-0.925751,-1.16106,0.896083
3,-0.836987,-1.383007,0.729536,0.684829
4,-1.606003,-0.382521,0.591213,-0.128939
5,0.023545,0.205847,0.189882,0.059222
6,-1.135021,-0.643469,0.988402,0.321417
7,-0.087478,-0.233495,0.343168,-1.555828
8,0.116964,0.412299,0.272079,0.365933
9,-0.632484,0.052837,-0.627665,0.359088


In [25]:
# sample data into a pandas dataframe

df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/school_earnings.csv")
df.index = df.School # set index
del df['School'] # remove column
df

Unnamed: 0_level_0,Women,Men,Gap
School,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MIT,94,152,58
Stanford,96,151,55
Harvard,112,165,53
U.Penn,92,141,49
Princeton,90,137,47
Chicago,78,118,40
Georgetown,94,131,37
Tufts,76,112,36
Yale,79,114,35
Columbia,86,119,33


In [26]:
# print 1 column
df['Gap']

School
MIT           58
Stanford      55
Harvard       53
U.Penn        49
Princeton     47
Chicago       40
Georgetown    37
Tufts         36
Yale          35
Columbia      33
Duke          31
Dartmouth     30
NYU           27
Notre Dame    27
Cornell       27
Michigan      22
Brown         20
Berkeley      17
Emory         14
UCLA          14
SoCal          9
Name: Gap, dtype: int64

In [27]:
# column names
df.columns

Index(['Women', 'Men', 'Gap'], dtype='object')

In [31]:
# new column

df['Total'] = df.Women + df.Men
df

Unnamed: 0_level_0,Women,Men,Gap,Total
School,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MIT,94,152,58,246
Stanford,96,151,55,247
Harvard,112,165,53,277
U.Penn,92,141,49,233
Princeton,90,137,47,227
Chicago,78,118,40,196
Georgetown,94,131,37,225
Tufts,76,112,36,188
Yale,79,114,35,193
Columbia,86,119,33,205


In [32]:
# basic statistics

print('Men =', df.Men.mean())
print('Women =', df.Women.mean())
print('Dimensions =', df.shape)

Men = 113.523809524
Women = 81.0952380952
Dimensions = (21, 4)


In [33]:
# sort data

df.sort_values(['Total'], ascending=False)

Unnamed: 0_level_0,Women,Men,Gap,Total
School,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Harvard,112,165,53,277
Stanford,96,151,55,247
MIT,94,152,58,246
U.Penn,92,141,49,233
Princeton,90,137,47,227
Georgetown,94,131,37,225
Duke,93,124,31,217
Columbia,86,119,33,205
Dartmouth,84,114,30,198
Chicago,78,118,40,196


In [34]:
df.Gap.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x112aacf98>

In [35]:
# unique values
print(len(df.Gap))
print(df.Gap.unique())
print(len(df.Gap.unique()))

21
[58 55 53 49 47 40 37 36 35 33 31 30 27 22 20 17 14  9]
18


## 7. Numpy

In [36]:
a = np.array([1, 2, 3])               # Create a rank 1 array
print('type =', type(a))              # Prints "<class 'numpy.ndarray'>"
print('shape =', a.shape)             # Prints "(3,)"
print('elements =', a[0], a[1], a[2]) # Prints "1 2 3"
a[0] = 5                              # Change an element of the array
print('with new element:', a)         # Prints "[5, 2, 3]"

b = np.array([[1,2,3],[4,5,6]])       # Create a rank 2 array
print('New array shape =', b.shape)   # Prints "(2, 3)"
print('Elements =', b[0, 0], b[0, 1], b[1, 0])      # Prints "1 2 4"

type = <class 'numpy.ndarray'>
shape = (3,)
elements = 1 2 3
with new element: [5 2 3]
New array shape = (2, 3)
Elements = 1 2 4


In [37]:
# make matrix of zeros
print('Zero Matrix')
print(np.zeros((2,2)))

print('\n')

# make matrix of 1s
print('1 Matrix')
print(np.ones((1,2)))

print('\n')

#make constant matrix
print('Constant Matrix')
print(np.full((2,2), 7))

print('\n')

#make identity matrix
print('Identity Matrix')
print(np.eye(2))

print('\n')

#make random matrix
print('Random Matrix')
print(np.random.random((2,2)))

Zero Matrix
[[ 0.  0.]
 [ 0.  0.]]


1 Matrix
[[ 1.  1.]]


Constant Matrix
[[7 7]
 [7 7]]


Identity Matrix
[[ 1.  0.]
 [ 0.  1.]]


Random Matrix
[[ 0.56537522  0.11544463]
 [ 0.24259327  0.50256221]]


In [38]:
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(a)
print('\n')

print('First two rows and Middle two column')
print(a[:2, 1:3]) ## print first two rows and middle two columns

print('\n')

print('Last Value')
print('Last Value =', a[-1,-1]) # negative indices start from the end

print('\n')

# change value in matrix
a[0,0] = 2
print(a)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


First two rows and Middle two column
[[2 3]
 [6 7]]


Last Value
Last Value = 12


[[ 2  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [39]:
# element-wise operations

x = np.array([[1,2],[3,4]])
y = np.array([[5,6],[7,8]])

print('Addition')
print(np.add(x,y))
print('Subtraction')
print(np.subtract(x,y))
print('Multiplication')
print(np.multiply(x,y))
print('Division')
print(np.divide(x,y))
print('Square Root')
print(np.sqrt(x))

Addition
[[ 6  8]
 [10 12]]
Subtraction
[[-4 -4]
 [-4 -4]]
Multiplication
[[ 5 12]
 [21 32]]
Division
[[ 0.2         0.33333333]
 [ 0.42857143  0.5       ]]
Square Root
[[ 1.          1.41421356]
 [ 1.73205081  2.        ]]


In [40]:
# Matrix Operations
x = np.array([9,10])
y = np.array([11, 12])

print('Dot Product:', np.dot(x,y))
x = np.array([[1,2],[3,4]])


print('Sum of all values:', np.sum(x))
print('Sum of columns:', np.sum(x, axis=0)) # axis 0 is columns
print('Sum of rows:', np.sum(x, axis=1)) # axis 1 is rows

print('Transpose:')
print(x.T)

Dot Product: 219
Sum of all values: 10
Sum of columns: [4 6]
Sum of rows: [3 7]
Transpose:
[[1 3]
 [2 4]]


## Pickle

In [41]:
# Make up some data

animal_classification = { "lion": "mammal", "hawk": "bird", "crocodile":"reptile" }
print(animal_classification)
pickle.dump( animal_classification, open( "animals.p", "wb" ))

{'lion': 'mammal', 'hawk': 'bird', 'crocodile': 'reptile'}


In [42]:
ls

README.md                        crypto_data_vizualization.py
animals.p                        intro_tutorial.ipynb
[34mbin[m[m/                             [34mpoloniex_cache[m[m/
crypto_data_vizualization.ipynb  [34mquandl_cache[m[m/


In [43]:
animals = pickle.load( open( "animals.p", "rb" ) )
print(animals)

{'lion': 'mammal', 'hawk': 'bird', 'crocodile': 'reptile'}
