'''
What is Pandas?
Pandas is a Python library used for working with data sets.

It has functions for analyzing, cleaning, exploring, and manipulating data.

Why Use Pandas?
Pandas allows us to analyze big data and make conclusions based on statistical theories.

Pandas can clean messy data sets, and make them readable and relevant.

Relevant data is very important in data science.
'''

In [1]:
# !pip install pandas
import pandas as pd 
import numpy as np
s=pd.Series([1,5,25,5,5,4,6],index=['a','b','c','d','e','f','g'])# pandas series can make the 1D array 
s

a     1
b     5
c    25
d     5
e     5
f     4
g     6
dtype: int64

In [2]:
# !pip install pandas
import pandas as pd
import numpy as np
'''
What is a Series?
A Pandas Series is like a column in a table.

It is a one-dimensional array holding data of any type.
'''
s1 = pd.Series([23,24,25,12,32])
print(s1)

0    23
1    24
2    25
3    12
4    32
dtype: int64


In [3]:
s2 = pd.Series([23,45,67,12,34],index = ['a','b','c','d','e'],dtype='int')
s2

a    23
b    45
c    67
d    12
e    34
dtype: int32

In [4]:
s3 = pd.Series([23,45,67,12,34],index = ['a','b','c','d','e'],dtype = 'float')
s3['a':'d']

a    23.0
b    45.0
c    67.0
d    12.0
dtype: float64

In [5]:
'''
Labels
If nothing else is specified, the values are labeled with their index number. First value has index 0, second value has index 1 etc.

This label can be used to access a specified value.'''
s3['a']

23.0

# creation of Series using dictionary

In [6]:
s5=pd.Series({'name':'vineeth reddy','age':25})
s5

name    vineeth reddy
age                25
dtype: object

In [7]:
s4 = pd.Series({'a':65,'b':43})
s4

a    65
b    43
dtype: int64

# DataFrame
What is a DataFrame?
A Pandas DataFrame is a 2 dimensional data structure, like a 2 dimensional array, or a table with rows and columns.

In [8]:
d1 = pd.DataFrame([[43,54,65,76]])  #2d in single row elements (two closed brackets)
d1

Unnamed: 0,0,1,2,3
0,43,54,65,76


In [9]:
d2 = pd.DataFrame([[2,3,4],[4,5,6],[1,2,3]])   #2d in  multiple values 
d2

Unnamed: 0,0,1,2
0,2,3,4
1,4,5,6
2,1,2,3


In [10]:
s=pd.Series([1,5,4,6,8,5])  #series or 1d array
d2 = pd.DataFrame(s)               #converting series into dataframe
d2

Unnamed: 0,0
0,1
1,5
2,4
3,6
4,8
5,5


In [11]:
d3  = pd.DataFrame([[2,3,4],[4,5,6],[1,2,3]],columns =['a','b','c'])
d3

Unnamed: 0,a,b,c
0,2,3,4
1,4,5,6
2,1,2,3


In [12]:
d3  = pd.DataFrame([[2,3,4],[4,5,6],[1,2,3]],columns =['a','b','c'],index = ['x','y','z'])#in table  colomns and index numbers or names to be arranged 
d3

Unnamed: 0,a,b,c
x,2,3,4
y,4,5,6
z,1,2,3


### creating DataFrame from list of dictionaries

In [13]:
dic = [{'alex':1,'joe':2},{'ema':5,'dora':10,'alice':20},{'vineeth':25}]    
pd.DataFrame(dic,index=['a','b','c'])

Unnamed: 0,alex,joe,ema,dora,alice,vineeth
a,1.0,2.0,,,,
b,,,5.0,10.0,20.0,
c,,,,,,25.0


### DataFrame operations

In [14]:
d3

Unnamed: 0,a,b,c
x,2,3,4
y,4,5,6
z,1,2,3


In [15]:
d3['a']    #column indexing


x    2
y    4
z    1
Name: a, dtype: int64

In [16]:
d3['d']=d3['a']*d3['b']    #adding new column with  multiplication of a nd b
d3

Unnamed: 0,a,b,c,d
x,2,3,4,6
y,4,5,6,20
z,1,2,3,2


In [17]:
pop = d3.pop('a') # pop means removing perticular column in data 
d3

Unnamed: 0,b,c,d
x,3,4,6
y,5,6,20
z,2,3,2


In [18]:
d3

Unnamed: 0,b,c,d
x,3,4,6
y,5,6,20
z,2,3,2


In [19]:
d3.insert(1,'name',d3['b'])   #inserting the data on particular indexing place here 2 is index number 
d3

Unnamed: 0,b,name,c,d
x,3,3,4,6
y,5,5,6,20
z,2,2,3,2


In [23]:
# creating the dataframe and inserting the data 
d4 = pd.DataFrame({'vineeth':np.random.randint(2,50,size = (10)),
                   'kiran':np.random.randint(4,10,size = (10)),
                   'uday':np.random.randint(3,4,size = (10))})

In [24]:
d4.head()

Unnamed: 0,vineeth,kiran,uday
0,33,5,3
1,45,9,3
2,33,6,3
3,23,4,3
4,18,6,3


In [25]:
d4.tail()

Unnamed: 0,vineeth,kiran,uday
5,6,7,3
6,14,9,3
7,5,5,3
8,23,9,3
9,7,6,3


In [26]:
'''
Info About the Data
The DataFrames object has a method called info(), that gives you more information about the data set.'''
d4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   vineeth  10 non-null     int32
 1   kiran    10 non-null     int32
 2   uday     10 non-null     int32
dtypes: int32(3)
memory usage: 248.0 bytes


In [27]:
d4

Unnamed: 0,vineeth,kiran,uday
0,33,5,3
1,45,9,3
2,33,6,3
3,23,4,3
4,18,6,3
5,6,7,3
6,14,9,3
7,5,5,3
8,23,9,3
9,7,6,3


In [33]:
d4.loc[9,'uday']

3

In [29]:
d4.loc[4:9,['vineeth','uday']]

Unnamed: 0,vineeth,uday
4,18,3
5,6,3
6,14,3
7,5,3
8,23,3
9,7,3


In [30]:
loc=d4.loc[[3,4,7],['vineeth','kiran']]
loc

Unnamed: 0,vineeth,kiran
3,23,4
4,18,6
7,5,5


In [32]:
d4.iloc[9,1]  #[row,column]

6

In [34]:
iloc=d4.iloc[2:7,[0,1]]
iloc

Unnamed: 0,vineeth,kiran
2,33,6
3,23,4
4,18,6
5,6,7
6,14,9


In [35]:
d4.iloc[2:7,[0,1,2]]

Unnamed: 0,vineeth,kiran,uday
2,33,6,3
3,23,4,3
4,18,6,3
5,6,7,3
6,14,9,3


In [38]:
d4.values

array([[33,  5,  3],
       [45,  9,  3],
       [33,  6,  3],
       [23,  4,  3],
       [18,  6,  3],
       [ 6,  7,  3],
       [14,  9,  3],
       [ 5,  5,  3],
       [23,  9,  3],
       [ 7,  6,  3]])

In [42]:
d4.kiran.values

array([5, 9, 6, 4, 6, 7, 9, 5, 9, 6])

In [43]:
d4['sum'] = d4.vineeth + d4.kiran + d4.uday
d4

Unnamed: 0,vineeth,kiran,uday,sum
0,33,5,3,41
1,45,9,3,57
2,33,6,3,42
3,23,4,3,30
4,18,6,3,27
5,6,7,3,16
6,14,9,3,26
7,5,5,3,13
8,23,9,3,35
9,7,6,3,16


In [45]:
#creating dataframe 
a = [['VineethReddy','1a',35000],['Kiran','1b',20000],['krishna','1c',15000]]  # values stored in a
dfl = pd.DataFrame(a,columns = ['name','id','salary'],index=['x','y','z'])      #  columns names and rows as index 
dfl

Unnamed: 0,name,id,salary
x,VineethReddy,1a,35000
y,Kiran,1b,20000
z,krishna,1c,15000


In [57]:
y=dfl[dfl.salary<30000]
y

Unnamed: 0,name,id,salary
1,Kiran,1b,20000
2,krishna,1c,15000


In [58]:
dfl.append({'name' : 'rakesh','id' : '1d','salary' : 30000},ignore_index = True)

Unnamed: 0,name,id,salary
0,VineethReddy,1a,35000
1,Kiran,1b,20000
2,krishna,1c,15000
3,,1b,30000
4,,1b,30000
5,rakesh,1d,30000


In [59]:
dfl = dfl.append({'name' : np.nan,'id' : '1b','salary' : 30000},ignore_index = True)
dfl

Unnamed: 0,name,id,salary
0,VineethReddy,1a,35000
1,Kiran,1b,20000
2,krishna,1c,15000
3,,1b,30000
4,,1b,30000
5,,1b,30000


In [60]:
dfl.isna()

Unnamed: 0,name,id,salary
0,False,False,False
1,False,False,False
2,False,False,False
3,True,False,False
4,True,False,False
5,True,False,False


In [61]:
dfl.isnull().sum()

name      3
id        0
salary    0
dtype: int64

In [62]:
df1=dfl.dropna()
df1

Unnamed: 0,name,id,salary
0,VineethReddy,1a,35000
1,Kiran,1b,20000
2,krishna,1c,15000


In [63]:
dfl

Unnamed: 0,name,id,salary
0,VineethReddy,1a,35000
1,Kiran,1b,20000
2,krishna,1c,15000
3,,1b,30000
4,,1b,30000
5,,1b,30000


In [70]:
dfl.fillna(value = 'vinay')

Unnamed: 0,name,id,salary
0,VineethReddy,1a,35000
1,Kiran,1b,20000
2,krishna,1c,15000
3,vinay,1b,30000
4,vinay,1b,30000
5,vinay,1b,30000


### groupby

In [72]:
df = pd.DataFrame({'Animal' : ['Falcon', 'Falcon','Parrot','Parrot'],'Max Speed': [380., 370., 24., 26.]})
df

Unnamed: 0,Animal,Max Speed
0,Falcon,380.0
1,Falcon,370.0
2,Parrot,24.0
3,Parrot,26.0


In [73]:
df.groupby(['Animal']).mean()

Unnamed: 0_level_0,Max Speed
Animal,Unnamed: 1_level_1
Falcon,375.0
Parrot,25.0


In [None]:

'''
Finding Relationships
A great aspect of the Pandas module is the corr() method.

The corr() method calculates the relationship between each column in your data set.
'''

In [75]:
df.corr()

Unnamed: 0,Max Speed
Max Speed,1.0
