# <p style="text-align: Center;">NumPy and Pandas</p>
## <p style="text-align: Center;">University of Wyoming COSC 1010</p>
### <p style="text-align: Center;">Adapted from: *Data Visualization with Python and JavaScript* By Kyran Dale </p>

## <p style="text-align: Center;">Introduction to NumPy</p>

In [34]:
import numpy as np 

a = np.array([1,2,3])

print(a+a)

[2 4 6]


In [2]:
def print_array_details(a):
    print(f"Dimensions: {a.ndim}\nshape: {a.shape}\ndtype: {a.dtype}")

a = np.array([1,2,3,4,5,6,7,8])
print(a)
print_array_details(a)

[1 2 3 4 5 6 7 8]
Dimensions: 1
shape: (8,)
dtype: int64


In [3]:
#first a 2d array from our 1d array 
a2 = a.reshape([2,4])
print(a2)

[[1 2 3 4]
 [5 6 7 8]]


In [4]:
#Then a 3d array from our 1d array 
a3 = a.reshape([2,2,2])
print(a3)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [8]:
print(np.zeros([2,3]))
print()
print(np.ones([3,3]))
print()
print(np.empty([2,2]))

[[0. 0. 0.]
 [0. 0. 0.]]

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]

[[4.68677518e-310 0.00000000e+000]
 [4.68616830e-310 4.68616830e-310]]


In [11]:
print(np.random.random((2,2)))
print(np.linspace(2,10,5)) # five numbers range 2-10
print(np.arange(2,10,2)) # 2 being the step size

[[0.36800437 0.12884575]
 [0.40857825 0.43752508]]
[ 2.  4.  6.  8. 10.]
[2 4 6 8]


In [14]:
print(a[2])
print(a[3:5]) # a slice 
print(a[:4:2]) # slice of 0:4 every second item 
print(a[::-1]) #reversed 

3
[4 5]
[1 3]
[8 7 6 5 4 3 2 1]


In [17]:
ax = np.arange(16,dtype='int32')
ax = ax.reshape([2,2,4])
print(ax)
print(ax[1,1,2])

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]
14


In [20]:
print(a)
print(a*2)#multiply all elements by 2
print(a-2)#subtract 2 from all elements
print(a/2)#divide all numbers
print(a < 4)#check if all numbers are <4

[1 2 3 4 5 6 7 8]
[ 2  4  6  8 10 12 14 16]
[-1  0  1  2  3  4  5  6]
[0.5 1.  1.5 2.  2.5 3.  3.5 4. ]
[ True  True  True False False False False False]


## The `DataFrame`
---
* The first step in a Pandas session is to load some data
* There are various ways to do so, and data can be loaded from multiple sources 
* For now we wills tart with some JSON data
* You can utilize Pandas' `read_json()` method to get a data frame based on the JSON 
* By convention `DataFrame` objects' name starts with `df`

In [5]:
import pandas as pd 
df = pd.read_json('151pokemon.json')

## The `DataFrame`
---
* With the `df` in hand we cans ee and work with the contents
* A quick way to get the row-column structure of a `DF` is to use the `head` method 
* The head method shows the first five elements 

In [6]:
df.head

<bound method NDFrame.head of      number        name                                       types  \
0         1   Bulbasaur   [{'type1': 'Grass'}, {'type2': 'Poison'}]   
1         2     Ivysaur   [{'type1': 'Grass'}, {'type2': 'Poison'}]   
2         3    Venusaur   [{'type1': 'Grass'}, {'type2': 'Poison'}]   
3         4  Charmander      [{'type1': 'Fire'}, {'type2': 'none'}]   
4         5  Charmeleon      [{'type1': 'Fire'}, {'type2': 'none'}]   
..      ...         ...                                         ...   
146     147     Dratini    [{'type1': 'Dragon'}, {'type2': 'none'}]   
147     148   Dragonair    [{'type1': 'Dragon'}, {'type2': 'none'}]   
148     149   Dragonite  [{'type1': 'Dragon'}, {'type2': 'Flying'}]   
149     150      Mewtwo   [{'type1': 'Psychic'}, {'type2': 'none'}]   
150     151         Mew   [{'type1': 'Psychic'}, {'type2': 'none'}]   

                                                 stats  
0    [{'total': '318'}, {'hp': '45'}, {'attack': '4...  
1  

In [9]:
df.columns

Index(['number', 'name', 'types', 'stats'], dtype='object')

In [10]:
df.index

RangeIndex(start=0, stop=151, step=1)

In [20]:
dfx = df.set_index('name')
dfx.loc["Charizard"]

number                                                    6
types              [{'type1': 'Fire'}, {'type2': 'Flying'}]
stats     [{'total': '534'}, {'hp': '78'}, {'attack': '8...
Name: Charizard, dtype: object

In [22]:
df.iloc[5]

number                                                    6
name                                              Charizard
types              [{'type1': 'Fire'}, {'type2': 'Flying'}]
stats     [{'total': '534'}, {'hp': '78'}, {'attack': '8...
Name: 5, dtype: object

In [24]:
name_col = df.name 
type(name_col)

pandas.core.series.Series

In [25]:
name_col

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
          ...    
146       Dratini
147     Dragonair
148     Dragonite
149        Mewtwo
150           Mew
Name: name, Length: 151, dtype: object

In [26]:
dfn = pd.DataFrame.from_dict([
{'name': 'Albert Einstein', 'category':'Physics'},
{'name': 'Marie Curie', 'category':'Chemistry'},
{'name': 'William Faulkner', 'category':'Literature'}
])

dfn

Unnamed: 0,name,category
0,Albert Einstein,Physics
1,Marie Curie,Chemistry
2,William Faulkner,Literature


In [27]:
s = pd.Series([1,2,3,4]) #automatically assigns int indices 
s

0    1
1    2
2    3
3    4
dtype: int64

In [28]:
# Alternatively can specify indices 
s = pd.Series([1,2,3,4],index=['a','b','c','d'])
s

a    1
b    2
c    3
d    4
dtype: int64

In [29]:
# A dict can be used to specify data and indices 

s = pd.Series({'a':1,'b':2,'c':3})
s

a    1
b    2
c    3
dtype: int64

In [30]:
# Finally a scalar can be passed with indices 
pd.Series(9,{'a','b','c'})

b    9
c    9
a    9
dtype: int64