In [1]:
import pandas as pd
import numpy as np

In [2]:
#Series:
#Pandas object designed to represent one dimensional data structures
#Consists of two arrays associated with each other: Index and Data
s = pd.Series([12,-4,7,9]) #Only data; Indexes are created automatically
s

0    12
1    -4
2     7
3     9
dtype: int64

In [3]:
s = pd.Series([12,-4,7,9], index =['a','b','c','d']) #Data and Index added
s

a    12
b    -4
c     7
d     9
dtype: int64

In [6]:
print(s.index,"\n")
print(s.values)

Index(['a', 'b', 'c', 'd'], dtype='object') 

[12 -4  7  9]


In [20]:
#Selecting Internal Elements:

print(s[2]) #Selecting an element by treating this as an ordinary array
print(s['c'])
print() 
for i in s.index:
    print(s[i])
print()

for i in range(0,4):
    print(s[i])

7
7

12
-4
7
9

12
-4
7
9


In [22]:
#slicing
print(s[0:2], "\n")
print(s["a":'b'])

a    12
b    -4
dtype: int64 

a    12
b    -4
dtype: int64


In [23]:
#Assigning value to series elements
s[0]=999 #Assigning using automatic index
s["b"]=9999 #assigning using actual index
print(s)

a     999
b    9999
c       7
d       9
dtype: int64


In [28]:
arr = np.array([1,2,3,4]) #numpy array
s3 = pd.Series(arr) #Define new series using numpy array
s3

arr[3] =555 
s3          #s3 references the array; hence changing the array value is 
            # reflected in the series value

0      1
1      2
2      3
3    555
dtype: int64

In [27]:
s4 = pd.Series(s) #Series can be created from existing series
s

a     999
b    9999
c       7
d       9
dtype: int64

In [31]:
#Filtering
print(s>8, "\n")
print(s[s>8])

a     True
b     True
c    False
d     True
dtype: bool 

a     999
b    9999
d       9
dtype: int64


In [33]:
#Mathematical Functions
s = pd.Series([1,4,9,16,25,36,49,64,81,100])
print(s/2,"\n") #Division
print(np.log(s)) #Numpy log


0     0.5
1     2.0
2     4.5
3     8.0
4    12.5
5    18.0
6    24.5
7    32.0
8    40.5
9    50.0
dtype: float64 

0    0.000000
1    1.386294
2    2.197225
3    2.772589
4    3.218876
5    3.583519
6    3.891820
7    4.158883
8    4.394449
9    4.605170
dtype: float64


In [35]:
#Evaluating values:
serd = pd.Series([1,0,2,1,2,3], index = ["white","white","blue",
                                         "green","green","yellow"])

print(serd.unique()) #gives unique values
print()
serd.value_counts() #gives unique values and their counts

[1 0 2 3]



2    2
1    2
3    1
0    1
dtype: int64

In [39]:
#membership fucntion: isin
print(serd.isin([0,3]))
#print(serd.isin([0:3])) Range like input does not work

white     False
white      True
blue      False
green     False
green     False
yellow     True
dtype: bool


In [41]:
#NaN can be added as an entry
s2 = pd.Series([5,-4, np.NaN, 14]) #NaN is accepted in the series
s2

0     5.0
1    -4.0
2     NaN
3    14.0
dtype: float64

In [42]:
print(s2.isnull()) # which indices are without value
print(s2.notnull()) #which indices have values

0    False
1    False
2     True
3    False
dtype: bool
0     True
1     True
2    False
3     True
dtype: bool


In [44]:
#isnull() and notnull() can be used for filterting
print(s2[s2.isnull()],"\n")
print(s2[s2.notnull()])

2   NaN
dtype: float64 

0     5.0
1    -4.0
3    14.0
dtype: float64


In [47]:
#Series can be treated as dictionaries:
mydict ={"red":2000,"blue":1000, "yellow":500,"orange":1000}
myseries = pd.Series(mydict)
print(myseries.values) #dictionary data is used as values
print(myseries.index) #keys are used as indices

[1000 1000 2000  500]
Index(['blue', 'orange', 'red', 'yellow'], dtype='object')


In [50]:
colors =["green","Red","blue",'grey',"pink"] #different indices than keys
myseries =pd.Series(mydict, index = colors)
print(myseries) #only blue has an associated key value pair in the dict

colors =["red","blue","yellow",'orange',"pink"]
myseries =pd.Series(mydict, index = colors)
myseries # all values except pink have key value pairs in dicts

green       NaN
Red         NaN
blue     1000.0
grey        NaN
pink        NaN
dtype: float64


red       2000.0
blue      1000.0
yellow     500.0
orange    1000.0
pink         NaN
dtype: float64

In [51]:
#Operation between Series
mydict ={"red":2000,"blue":1000, "yellow":500,"orange":1000}
myseries = pd.Series(mydict)

mydict2 = {"red":2000,"yellow":500,"black":700}
myseries2 = pd.Series(mydict2)

myseries+myseries2 #values are matched using indices, ordering inside
                    # series does not matter

black        NaN
blue         NaN
orange       NaN
red       4000.0
yellow    1000.0
dtype: float64

In [None]:
#Selecting elements