
**Table of Contents**
<div id="toc"></div>




# Creating DataFrames

## Loading CSV files


In [4]:
import pandas 



In [5]:
import numpy as np

df = pandas.DataFrame({
   'col1': ['Item0', 'Item0', 'Item1', 'Item1'],
   'col2': ['Gold', 'Bronze', 'Gold', 'Silver'],
   'col3': [1, 2, np.nan, 4]
})


In [9]:
# Displays the top 5 rows. Accepts an optional int parameter - num. of rows to show
df.head()





Unnamed: 0,col1,col2,col3
0,Item0,Gold,1.0
1,Item0,Bronze,2.0
2,Item1,Gold,
3,Item1,Silver,4.0


In [10]:
# Similar to head, but displays the last rows
df.tail()

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1.0
1,Item0,Bronze,2.0
2,Item1,Gold,
3,Item1,Silver,4.0


In [11]:



# The dimensions of the dataframe as a (rows, cols) tuple
df.shape


(4, 3)

In [12]:
# The number of columns. Equal to df.shape[0]
len(df) 


4

In [13]:

# An array of the column names
df.columns 

Index(['col1', 'col2', 'col3'], dtype='object')

In [14]:


# Columns and their types
df.dtypes



col1     object
col2     object
col3    float64
dtype: object

In [15]:
# Converts the frame to a two-dimensional table
df.values 

array([['Item0', 'Gold', 1.0],
       ['Item0', 'Bronze', 2.0],
       ['Item1', 'Gold', nan],
       ['Item1', 'Silver', 4.0]], dtype=object)

In [16]:
# Displays descriptive stats for all columns
df.describe()

Unnamed: 0,col3
count,3.0
mean,2.333333
std,1.527525
min,1.0
25%,1.5
50%,2.0
75%,3.0
max,4.0


In [6]:
# Sort rows descendingly by the index
df.sort_index(axis=0, ascending=False)


Unnamed: 0,col1,col2,col3
3,Item1,Silver,4.0
2,Item1,Gold,
1,Item0,Bronze,2.0
0,Item0,Gold,1.0


In [7]:
# Selects only the column named 'col1';
df.col1 

# Same as previous
df['col1'] 

# Select two columns
df[['col1', 'col2']]

Unnamed: 0,col1,col2
0,Item0,Gold
1,Item0,Bronze
2,Item1,Gold
3,Item1,Silver


In [8]:
# Produces and array, not a single value!
df.col3 > 0

0     True
1     True
2    False
3     True
Name: col3, dtype: bool

In [9]:
# Query by a single column value
df[df.col3 > 0] 

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1.0
1,Item0,Bronze,2.0
3,Item1,Silver,4.0


In [12]:
# A conjunction query using two columns
df[(df['col3'] > 0) & (df['col2'] == 'Silver')] 

# A disjunction query using two columns
df[(df['col3'] > 0) | (df['col2'] == 'Silver')]

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1.0
1,Item0,Bronze,2.0
3,Item1,Silver,4.0
