### 1. Import Libraries

In [2]:
# 'generic import' of math module
import math
math.sqrt(25)

5.0

In [3]:
# import a function
from math import sqrt
sqrt(25) # no longer have to reference the module

5.0

In [4]:
# import multiple functions at once
from math import cos, floor


In [5]:
# import all functions in a module (generally discouraged)
# from os import *

# define an alias
import numpy as np

In [8]:
# show all functions in math module
content = dir(math)
print(content)

['__doc__', '__loader__', '__name__', '__package__', '__spec__', 'acos', 'acosh', 'asin', 'asinh', 'atan', 'atan2', 'atanh', 'ceil', 'copysign', 'cos', 'cosh', 'degrees', 'e', 'erf', 'erfc', 'exp', 'expm1', 'fabs', 'factorial', 'floor', 'fmod', 'frexp', 'fsum', 'gamma', 'gcd', 'hypot', 'inf', 'isclose', 'isfinite', 'isinf', 'isnan', 'ldexp', 'lgamma', 'log', 'log10', 'log1p', 'log2', 'modf', 'nan', 'pi', 'pow', 'radians', 'remainder', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'tau', 'trunc']


### 2. Basic Operations

In [14]:
# Numbers
print(10 + 4) # add (returns 14)

print(10 - 4) # subtract (returns 6)

print(10 * 4) # multiply (returns 40)

print(10 ** 4) # exponent (returns 10000)

print(int(10 /4)) # divide (returns 2 because type is 'int')

print(10 / float(4)) # divide (returns 2.5)

print(5 % 4) # modulo (returns 1) - also known as the remainder

print(10 / 4) # true division (returns 2.5)

print(10 // 4) # floor division (returns 2)

14
6
40
10000
2
2.5
1
2.5
2


In [30]:
# Boolean operations
# comparisons (these return True)

print(5 > 3)
print(5 >= 3)
print(5 != 3)
print(5 == 5)

# boolean operations (these return True)
print(5 > 3 and 6 > 3)
print(5 > 3 or 5 < 3)
print(not False)
print(False or not False and True) # evaluation order: not, and, or

True
True
True
True
True
True
True
True


### 3. Data Types

In [33]:
# determine the type of an object

type(2) # returns 'int'
type(2.0) # returns 'float'
type('two') # returns 'str'
type(True) # returns 'bool'
type(None) # returns 'NoneType'

NoneType

In [36]:
# check if an object is of a given type

isinstance(2.0, int) # returns False
isinstance(2.0, (int, float)) # returns True

True

In [37]:
# convert an object to a given type

float(2)
int(2.9)
str(2.9)

'2.9'

In [38]:
# zero, None, and empty containers are converted to False

bool(0)
bool(None)
bool('') # empty string
bool([]) # empty list
bool({}) # empty dictionary

False

In [39]:
# non-empty containers and non-zeros are converted to True

bool(2)
bool('two')
bool([2])

True

### 4. List

Different objects categorized along a certain ordered sequence, lists are ordered, iterable, mutable
(adding or removing objects changes the list size), can contain multiple data types

In [40]:
# create an empty list (two ways)

empty_list = []
empty_list = list()

In [74]:
# create a list
simpsons = ['homer', 'marge', 'bart']

# examine a list
simpsons[0] # print element 0 ('homer')
len(simpsons) # returns the length (3)

3

In [75]:
simpsons

['homer', 'marge', 'bart']

In [76]:
# modify a list (does not return the list)

simpsons.append('lisa') # append element to end
simpsons.extend(['itchy', 'scratchy']) # append multiple elements to end

simpsons.insert(0, 'maggie') # insert element at index 0 (shifts everything␣˓→right)


In [77]:
simpsons

['maggie', 'homer', 'marge', 'bart', 'lisa', 'itchy', 'scratchy']

In [78]:
simpsons.remove('bart') # searches for first instance and removes it
simpsons.pop(0) # removes element 0 and returns it



'maggie'

In [79]:
simpsons

['homer', 'marge', 'lisa', 'itchy', 'scratchy']

In [82]:
del simpsons[0] # removes element 0 (does not return it)
simpsons[0] = 'krusty' # replace element 0

In [84]:
# concatenate lists (slower than 'extend' method)
neighbors = simpsons + ['ned','rod','todd']

In [85]:
neighbors

['krusty', 'itchy', 'scratchy', 'ned', 'rod', 'todd']

In [86]:
# find elements in a list

simpsons.count('lisa') # counts the number of instances
simpsons.index('itchy') # returns index of first instance

1

In [100]:
# list slicing [start:end:stride]

weekdays = ['mon','tues','wed','thurs','fri']
weekdays[0] # element 0
weekdays[0:3] # elements 0, 1, 2
weekdays[:3] # elements 0, 1, 2
weekdays[3:] # elements 3, 4
weekdays[-1] # last element (element 4)
weekdays[::2] # every 2nd element (0, 2, 4)
weekdays[::-1] # backwards (4, 3, 2, 1, 0)

# alternative method for returning the list backwards
list(reversed(weekdays))

['fri', 'thurs', 'wed', 'tues', 'mon']

In [105]:
# sort a list in place (modifies but does not return the list)

simpsons.sort()
simpsons.sort(reverse=True) # sort in reverse
simpsons.sort(key=len) # sort by a key

In [108]:
# return a sorted list (but does not modify the original list)

sorted(simpsons)
sorted(simpsons, reverse=True)
sorted(simpsons, key=len)

['itchy', 'krusty', 'scratchy']

In [111]:
# create a second reference to the same list

num = [1, 2, 3]
same_num = num
same_num[0] = 0 # modifies both 'num' and 'same_num'

num

In [117]:
# copy a list (three ways)

new_num = num.copy()
new_num = num[:]
new_num = list(num)

### 5. Tuples

Like lists, but their size cannot change: ordered, iterable, immutable, can contain multiple data
types

In [130]:
# create a tuple

digits = (0, 1, 'two') # create a tuple directly
digits = tuple([0, 1, 'two']) # create a tuple from a list
zero = (0,) # trailing comma is required to indicate it's a tuple

In [131]:
# examine a tuple
digits[2] # returns 'two'
len(digits) # returns 3
digits.count(0) # counts the number of instances of that value (1)
digits.index(1) # returns the index of the first instance of that value (1)

# elements of a tuple cannot be modified
# digits[2] = 2 # throws an error

1

In [133]:
# concatenate tuples
digits = digits + (3, 4)
# create a single tuple with elements repeated (also works with lists)
(3, 4) * 2 # returns (3, 4, 3, 4)

# tuple unpacking
bart = ('male', 10, 'simpson') # create a tuple

### 6. Strings

A sequence of characters, they are iterable, immutable

In [138]:
# create a string
s = str(42) # convert another data type into a string
s = 'I like Data Science'

# examine a string
s[0] # returns 'I'
len(s) # returns 19

# string slicing like lists
s[:6] # returns 'I like'

s[7:] # returns 'Data Science'
s[-1] # returns 'e'

'e'

In [150]:
# basic string methods (does not modify the original string)
s.lower() # returns 'i like data science'
s.upper() # returns 'I LIKE DATA SCIENCE'

s.startswith('I') # returns True
s.endswith('Science') # returns True

s.isdigit() # returns False (returns True if every character in the string is a␣˓→digit)

s.find('like') # returns index of first occurrence (2)
s.find('hate') # returns -1 since not found
s.replace('like','love') # replaces all instances of 'like' with 'love'


'I love Data Science'

In [158]:
# split a string into a list of substrings separated by a delimiter

s.split(' ') # returns ['I','like','Data','Science']
s.split() # same thing
s2 = 'a, an, the'
s2.split(',') # returns ['a',' an',' the']

['a', ' an', ' the']

In [167]:
# join a list of strings into one string using a delimiter
stooges = ['larry','curly','moe'] 
' '.join(stooges) # returns 'larry curly moe'

# concatenate strings
s3 = 'The meaning of life is'
s4 = '42'
s3 + ' ' + s4 # returns 'The meaning of life is 42'
s3 + ' ' + str(42) # same thing

'The meaning of life is 42'

In [168]:
# remove whitespace from start and end of a string

s5 = ' ham and cheese '
s5.strip() # returns 'ham and cheese'

'ham and cheese'

In [172]:
# string substitutions: all of these return 'raining cats and dogs'
'raining %s and %s' % ('cats','dogs') # old way
'raining {} and {}'.format('cats','dogs') # new way
'raining {arg1} and {arg2}'.format(arg1='cats',arg2='dogs') # named arguments

# string formatting
# more examples: http://mkaz.com/2012/10/10/python-string-format/
'pi is {:.2f}'.format(3.14159) # returns 'pi is 3.14'

'pi is 3.14'

### String 2/2

In [173]:
print('first line\nsecond line')

first line
second line


In [174]:
# raw strings treat backslashes as literal characters
print(r'first line\nfirst line')

first line\nfirst line


In [177]:
#sequece of bytes are not strings, should be decoded before some operations
s = b'first line\nsecond line'
print(s)
print(s.decode('utf-8').split())

b'first line\nsecond line'
['first', 'line', 'second', 'line']


### 7. Dictionaries

Dictionaries are structures which can contain multiple data types, and is ordered with key-value
pairs: for each (unique) key, the dictionary outputs one value. Keys can be strings, numbers, or
tuples, while the corresponding values can be any Python object. Dictionaries are: unordered,
iterable, mutable

In [193]:
# create an empty dictionary (two ways)
empty_dict = {}
empty_dict = dict()

In [194]:
# create a dictionary (two ways)
family = {'dad':'homer', 'mom':'marge', 'size':6}
family = dict(dad='homer', mom='marge', size=6)

In [195]:
family

{'dad': 'homer', 'mom': 'marge', 'size': 6}

In [196]:
# convert a list of tuples into a dictionary
list_of_tuples = [('dad','homer'), ('mom','marge'), ('size', 6)]
family = dict(list_of_tuples)

In [197]:
# examine a dictionary
family['dad'] # returns 'homer'
len(family) # returns 3

family.keys() # returns list: ['dad', 'mom', 'size']
family.values() # returns list: ['homer', 'marge', 6]
family.items() # returns list of tuples: # [('dad', 'homer'), ('mom', 'marge'), ('size', 6)]

'mom' in family # returns True
'marge' in family # returns False (only checks keys)

False

In [198]:
# modify a dictionary (does not return the dictionary)
family['cat'] = 'snowball' # add a new entry
family['cat'] = 'snowball ii' # edit an existing entry

del family['cat'] # delete an entry
family['kids'] = ['bart', 'lisa'] # value can be a list
family.pop('dad') # removes an entry and returns the value ('homer')
family.update({'baby':'maggie', 'grandpa':'abe'}) # add multiple entries

In [199]:
# accessing values more safely with 'get'
family['mom'] # returns 'marge'
family.get('mom') # same thing
try:
    family['grandma'] # throws an error
except KeyError as e:
    print("Error", e)

family.get('grandma') # returns None
family.get('grandma', 'not found') # returns 'not found' (the default)

# accessing a list element within a dictionary
family['kids'][0] # returns 'bart'
family['kids'].remove('lisa') # removes 'lisa'

# string substitution using a dictionary
'youngest child is %(baby)s' % family # returns 'youngest child is maggie'

Error 'grandma'


'youngest child is maggie'

### 8. Sets

Like dictionaries, but with unique keys only (no corresponding values). They are: unordered, iterable,
mutable, can contain multiple data types made up of unique elements (strings, numbers, or tuples)

In [201]:
# create an empty set
empty_set = set()

In [206]:
# create a set
languages = {'python', 'r', 'java'} # create a set directly
snakes = set(['cobra', 'viper', 'python']) # create a set from a list

# examine a set
len(languages) # returns 3
'python' in languages # returns True

True

In [207]:
# set operations
languages & snakes # returns intersection: {'python'}
languages | snakes # returns union: {'cobra', 'r', 'java', 'viper', 'python'}
languages - snakes # returns set difference: {'r', 'java'}
snakes - languages # returns set difference: {'cobra', 'viper'}

{'cobra', 'viper'}

In [208]:
# modify a set (does not return the set)
languages.add('sql') # add a new element
languages.add('r') # try to add an existing element (ignored, no error)
languages.remove('java') # remove an element
try:
    languages.remove('c') # try to remove a non-existing element (throws an error)
except KeyError as e:
    print("Error", e)
languages.discard('c') # removes an element if present, but ignored otherwise
languages.pop() # removes and returns an arbitrary element
languages.clear() # removes all elements
languages.update('go', 'spark') # add multiple elements (can also pass a list or set)

# get a sorted list of unique elements from a list
sorted(set([9, 0, 2, 1, 0])) # returns [0, 1, 2, 9]

Error 'c'


'python'

### 9. Execution control statements

#### Conditional statements

In [210]:
x = 3
# if statement
if x > 0:
    print('positive')
    
# if/else statement
if x > 0:
    print('positive')
else:
    print('zero or negative')

    # if/elif/else statement
if x > 0:
    print('positive')
elif x == 0:
    print('zero')
else:
    print('negative')
    
# single-line if statement (sometimes discouraged)
if x > 0: print('positive')
    
# single-line if/else statement (sometimes discouraged)
# known as a 'ternary operator'
'positive' if x > 0 else 'zero or negative'

positive
positive
positive
positive


'positive'

#### Loops

In [212]:
# range returns a list of integers
range(0, 3) # returns [0, 1, 2]: includes first value but excludes second value
range(3) # same thing: starting at zero is the default
range(0, 5, 2) # returns [0, 2, 4]: third argument specifies the 'stride'

# for loop (not recommended)
fruits = ['apple', 'banana', 'cherry']
for i in range(len(fruits)):
    print(fruits[i].upper())
    
# alternative for loop (recommended style)
for fruit in fruits:
    print(fruit.upper())
    
# use range when iterating over a large sequence to avoid actually creating the integer␣˓→list in memory
for i in range(10**6):
    pass

# iterate through two things at once (using tuple unpacking)
family = {'dad':'homer', 'mom':'marge', 'size':6}
for key, value in family.items():
    print(key, value)

# use enumerate if you need to access the index value within the loop
for index, fruit in enumerate(fruits):
    print(index, fruit)

# for/else loop
for fruit in fruits:
    if fruit == 'banana':
        print("Found the banana!")
        break # exit the loop and skip the 'else' block
    else:
        # this block executes ONLY if the for loop completes without hitting 'break'
        print("Can't find the banana")
        
# while loop
count = 0
while count < 5:
    print("This will print 5 times")
    count += 1 # equivalent to 'count = count + 1'

APPLE
BANANA
CHERRY
APPLE
BANANA
CHERRY
dad homer
mom marge
size 6
0 apple
1 banana
2 cherry
Can't find the banana
Found the banana!
This will print 5 times
This will print 5 times
This will print 5 times
This will print 5 times
This will print 5 times


### 10. Functions

In [215]:
# define a function with no arguments and no return values
def print_text():
    print('this is text')
    
# call the function
print_text()

this is text
3
3


In [216]:
# define a function with one argument and no return values
def print_this(x):
    print(x)
    
# call the function
print_this(3) # prints 3
n = print_this(3) # prints 3, but doesn't assign 3 to n
                  # because the function has no return statement

3
3


In [217]:
# define a function with one argument and one return value
def square_this(x):
    return x ** 2

In [220]:
# include an optional docstring to describe the effect of a function
def square_this(x):
    """Return the square of a number."""
    return x ** 2

# call the function
square_this(3) # prints 9
var = square_this(3) # assigns 9 to var, but does not print 9

9

In [221]:
# default arguments
def power_this(x, power=2):
    return x ** power

power_this(2) # 4
power_this(2, 3) # 8

8

In [222]:
# use 'pass' as a placeholder if you haven't written the function body
def stub():
    pass

# return two values from a single function
def min_max(nums):
    return min(nums), max(nums)

In [223]:
# return values can be assigned to a single variable as a tuple
nums = [1, 2, 3]
min_max_num = min_max(nums) # min_max_num = (1, 3)

In [224]:
# return values can be assigned into multiple variables using tuple unpacking
min_num, max_num = min_max(nums) # min_num = 1, max_num = 3

### 11. NUMPY: ARRAYS AND MATRICES

#### Create arrays

Create ndarrays from lists. note: every element must be the same type (will be converted if possible)

In [227]:
import numpy as np
data1 = [1, 2, 3, 4, 5] # list
arr1 = np.array(data1) # 1d array

data2 = [range(1, 5), range(5, 9)] # list of lists
arr2 = np.array(data2) # 2d array

arr2.tolist() # convert array back to list

[[1, 2, 3, 4], [5, 6, 7, 8]]

In [233]:
# create special arrays

np.zeros(10)
np.zeros((3, 6))
np.ones(10)

print(np.linspace(0, 1, 5)) # 0 to 1 (inclusive) with 5 points
np.logspace(0, 3, 4) # 10^0 to 10^3 (inclusive) with 4 points

[0.   0.25 0.5  0.75 1.  ]


array([   1.,   10.,  100., 1000.])

In [234]:
# arange is like range, except it returns an array (not a list)
int_array = np.arange(5)
float_array = int_array.astype(float)

#### Examining arrays

In [235]:
arr1.dtype # float64
arr2.dtype # int32
arr2.ndim # 2

arr2.shape # (2, 4) - axis 0 is rows, axis 1 is columns
arr2.size # 8 - total number of elements
len(arr2) # 2 - size of first dimension (aka axis)

2

#### Reshaping

In [236]:
arr = np.arange(10, dtype=float).reshape((2, 5))
print(arr.shape)
print(arr.reshape(5, 2))

(2, 5)
[[0. 1.]
 [2. 3.]
 [4. 5.]
 [6. 7.]
 [8. 9.]]


#### PANDAS: DATA MANIPULATION

In [239]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### Create DataFrame

In [241]:
columns = ['name', 'age', 'gender', 'job']
user1 = pd.DataFrame([['alice', 19, "F", "student"],['john', 26, "M", "student"]],columns=columns)
user2 = pd.DataFrame([['eric', 22, "M", "student"],['paul', 58, "F", "manager"]],columns=columns)
user3 = pd.DataFrame(dict(name=['peter', 'julie'],age=[33, 44], gender=['M', 'F'],
job=['engineer', 'scientist']))
print(user3)

   age gender        job   name
0   33      M   engineer  peter
1   44      F  scientist  julie


#### Concatenate DataFrame

In [242]:
user1.append(user2)
users = pd.concat([user1, user2, user3])
print(users)

   age gender        job   name
0   19      F    student  alice
1   26      M    student   john
0   22      M    student   eric
1   58      F    manager   paul
0   33      M   engineer  peter
1   44      F  scientist  julie


#### Join DataFrame

In [243]:
user4 = pd.DataFrame(dict(name=['alice', 'john', 'eric', 'julie'],
height=[165, 180, 175, 171]))
print(user4)

   height   name
0     165  alice
1     180   john
2     175   eric
3     171  julie


In [245]:
# Use intersection of keys from both frames
merge_inter = pd.merge(users, user4, on="name")
print(merge_inter)

   age gender        job   name  height
0   19      F    student  alice     165
1   26      M    student   john     180
2   22      M    student   eric     175
3   44      F  scientist  julie     171


In [246]:
# Use union of keys from both frames
users = pd.merge(users, user4, on="name", how='outer')
print(users)

   age gender        job   name  height
0   19      F    student  alice   165.0
1   26      M    student   john   180.0
2   22      M    student   eric   175.0
3   58      F    manager   paul     NaN
4   33      M   engineer  peter     NaN
5   44      F  scientist  julie   171.0


### Summarizing

In [247]:
# examine the users data
users # print the first 30 and last 30 rows
type(users) # DataFrame
users.head() # print the first 5 rows
users.tail() # print the last 5 rows
users.index # "the index" (aka "the labels")
users.columns # column names (which is "an index")
users.dtypes # data types of each column
users.shape # number of rows and columns
users.values # underlying numpy array
users.info() # concise summary (includes memory usage as of pandas 0.15.0

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6 entries, 0 to 5
Data columns (total 5 columns):
age       6 non-null int64
gender    6 non-null object
job       6 non-null object
name      6 non-null object
height    4 non-null float64
dtypes: float64(1), int64(1), object(3)
memory usage: 288.0+ bytes


#### Columns Selection

In [248]:
users['gender'] # select one column
type(users['gender']) # Series
users.gender # select one column using the DataFrame
# select multiple columns
users[['age', 'gender']] # select two columns
my_cols = ['age', 'gender'] # or, create a list...
users[my_cols] # ...and use that list to select columns
type(users[my_cols]) # DataFrame

pandas.core.frame.DataFrame

In [250]:
# Rows selection (basic) iloc is strictly integer position based

df = users.copy()
df.iloc[0] # first row
df.iloc[0, 0] # first item of first row
df.iloc[0, 0] = 55
for i in range(users.shape[0]):
    row = df.iloc[i]
    row.age *= 100 # setting a copy, and not the original frame data.
print(df) # df is not modified

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


   age gender        job   name  height
0   55      F    student  alice   165.0
1   26      M    student   john   180.0
2   22      M    student   eric   175.0
3   58      F    manager   paul     NaN
4   33      M   engineer  peter     NaN
5   44      F  scientist  julie   171.0


In [251]:
### Rows selection (filtering)

users[users.age < 20] # only show users with age < 20
young_bool = users.age < 20 # or, create a Series of booleans...
young = users[young_bool] # ...and use that Series to filter rows

In [252]:
users[users.age < 20].job # select one column from the filtered results
print(young)

   age gender      job   name  height
0   19      F  student  alice   165.0


In [253]:
users[users.age < 20][['age', 'job']] # select multiple columns
users[(users.age > 20) & (users.gender == 'M')] # use multiple conditions
users[users.job.isin(['student', 'engineer'])] # filter specific values

Unnamed: 0,age,gender,job,name,height
0,19,F,student,alice,165.0
1,26,M,student,john,180.0
2,22,M,student,eric,175.0
4,33,M,engineer,peter,


### Sorting

In [254]:
df = users.copy()
df.age.sort_values() # only works for a Series
df.sort_values(by='age') # sort rows by a specific column
df.sort_values(by='age', ascending=False) # use descending order instead
df.sort_values(by=['job', 'age']) # sort by multiple columns
df.sort_values(by=['job', 'age'], inplace=True) # modify df
print(df)

   age gender        job   name  height
4   33      M   engineer  peter     NaN
3   58      F    manager   paul     NaN
5   44      F  scientist  julie   171.0
0   19      F    student  alice   165.0
2   22      M    student   eric   175.0
1   26      M    student   john   180.0


### Descriptive statistics

In [255]:
print(df.describe())

             age      height
count   6.000000    4.000000
mean   33.666667  172.750000
std    14.895189    6.344289
min    19.000000  165.000000
25%    23.000000  169.500000
50%    29.500000  173.000000
75%    41.250000  176.250000
max    58.000000  180.000000


In [256]:
# Summarize all columns
print(df.describe(include='all'))
print(df.describe(include=['object'])) # limit to one (or more) types

              age gender      job   name      height
count    6.000000      6        6      6    4.000000
unique        NaN      2        4      6         NaN
top           NaN      F  student  alice         NaN
freq          NaN      3        3      1         NaN
mean    33.666667    NaN      NaN    NaN  172.750000
std     14.895189    NaN      NaN    NaN    6.344289
min     19.000000    NaN      NaN    NaN  165.000000
25%     23.000000    NaN      NaN    NaN  169.500000
50%     29.500000    NaN      NaN    NaN  173.000000
75%     41.250000    NaN      NaN    NaN  176.250000
max     58.000000    NaN      NaN    NaN  180.000000
       gender      job   name
count       6        6      6
unique      2        4      6
top         F  student  alice
freq        3        3      1


In [257]:
# Statistics per group (groupby)
print(df.groupby("job").mean())
print(df.groupby("job")["age"].mean())
print(df.groupby("job").describe(include='all'))

                 age      height
job                             
engineer   33.000000         NaN
manager    58.000000         NaN
scientist  44.000000  171.000000
student    22.333333  173.333333
job
engineer     33.000000
manager      58.000000
scientist    44.000000
student      22.333333
Name: age, dtype: float64
            age                                                               \
          count unique top freq       mean       std   min   25%   50%   75%   
job                                                                            
engineer    1.0    NaN NaN  NaN  33.000000       NaN  33.0  33.0  33.0  33.0   
manager     1.0    NaN NaN  NaN  58.000000       NaN  58.0  58.0  58.0  58.0   
scientist   1.0    NaN NaN  NaN  44.000000       NaN  44.0  44.0  44.0  44.0   
student     3.0    NaN NaN  NaN  22.333333  3.511885  19.0  20.5  22.0  24.0   

          ...    name                                                 
          ...  unique    top freq mean  std  min