# Ch3.0 Introducing Pandas Objects

In [1]:
import numpy as np
import pandas as pd

In [2]:
import os
# directory location
dir_data = './data/'
f_app = os.path.join(dir_data, 'data2.csv')
print('Path of read in data: %s' % (f_app))
data2 = pd.read_csv(f_app)

Path of read in data: ./data/data2.csv


  interactivity=interactivity, compiler=compiler, result=result)


MemoryError: 

## The Pandas Series Object

In [2]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [3]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [4]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [None]:
data[1]

In [None]:
data[1:3]

### Constructing Series objects

In [6]:
pd.Series([2, 4, 6])

0    2
1    4
2    6
dtype: int64

``data`` can be a scalar

In [7]:
pd.Series(5, index=[100, 200, 300])

100    5
200    5
300    5
dtype: int64

``data`` can be a dictionary

In [5]:
pd.Series({2:'a', 1:'b', 3:'c'})

2    a
1    b
3    c
dtype: object

## The Pandas DataFrame Object

### Constructing DataFrame objects

#### From a single Series object

In [8]:
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [9]:
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [14]:
pop = {'acardo':28977639, 'dwsskkj':29738987, 'dewafhohu':29766899, 'dhewiufga':2909089808}
pop2 = pd.Series(pop)
pop2 = pd.DataFrame(pop2, columns=['doisfjio'])
print(pop2)

             doisfjio
acardo       28977639
dwsskkj      29738987
dewafhohu    29766899
dhewiufga  2909089808


#### From a list of dicts

In [15]:
data = [{'a': i, 'b': 2 * i}
        for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


#### From a dictionary of Series objects

In [16]:
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [17]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [18]:
pd.DataFrame({'population': population,
              'area': area})

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


#### From a two-dimensional NumPy array

In [None]:
pd.DataFrame(np.random.rand(3, 2),
             columns=['foo', 'bar'],
             index=['a', 'b', 'c'])

#### From files

In [19]:
mb = pd.read_csv("./data/microbiome.csv")
mb

Unnamed: 0,Taxon,Patient,Group,Tissue,Stool
0,Firmicutes,1,0,136,4182
1,Firmicutes,2,1,1174,703
2,Firmicutes,3,0,408,3946
3,Firmicutes,4,1,831,8605
4,Firmicutes,5,0,693,50
5,Firmicutes,6,1,718,717
6,Firmicutes,7,0,173,33
7,Firmicutes,8,1,228,80
8,Firmicutes,9,0,162,3196
9,Firmicutes,10,1,372,32


In [20]:
pd.read_csv("./data/microbiome.csv", header=None).head()

Unnamed: 0,0,1,2,3,4
0,Taxon,Patient,Group,Tissue,Stool
1,Firmicutes,1,0,136,4182
2,Firmicutes,2,1,1174,703
3,Firmicutes,3,0,408,3946
4,Firmicutes,4,1,831,8605


In [22]:
mb = pd.read_table("./data/microbiome.csv", sep=',')
mb

  """Entry point for launching an IPython kernel.


Unnamed: 0,Taxon,Patient,Group,Tissue,Stool
0,Firmicutes,1,0,136,4182
1,Firmicutes,2,1,1174,703
2,Firmicutes,3,0,408,3946
3,Firmicutes,4,1,831,8605
4,Firmicutes,5,0,693,50
5,Firmicutes,6,1,718,717
6,Firmicutes,7,0,173,33
7,Firmicutes,8,1,228,80
8,Firmicutes,9,0,162,3196
9,Firmicutes,10,1,372,32


## The Pandas Index Object

In [23]:
ind = pd.Index([2, 3, 5, 7, 11])
ind

Int64Index([2, 3, 5, 7, 11], dtype='int64')

### Index as immutable array

In [None]:
ind[1]

In [None]:
ind[::2]

In [None]:
print(ind.size, ind.shape, ind.ndim, ind.dtype)

In [None]:
ind[1] = 0