# Pandas

Pandas is an open-source data manipulation and analysis library for the Python programming language. It provides data structures and functions needed to manipulate structured data seamlessly. The primary data structures in Pandas are:

* Series: A one-dimensional labeled array capable of holding any data type.
* DataFrame: A two-dimensional labeled data structure with columns of potentially different types.

Pandas is widely used for data cleaning, data transformation, data visualization, and statistical analysis. It is particularly powerful for working with large datasets.

In [1]:
import pandas as pd

In [3]:
# Creating a DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}

df = pd.DataFrame(data)

# Displaying the DataFrame
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [4]:
obj = pd.Series([4, 7, -5, 3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [7]:
obj2 = pd.Series([4, 7, -5, 3], index=["dog", "b", "a", "c"])
obj2

dog    4
b      7
a     -5
c      3
dtype: int64

In [6]:
obj2["a"]

-5

In [8]:
obj2 * 2

dog     8
b      14
a     -10
c       6
dtype: int64

In [9]:
obj2["b"] = 6
obj2

dog    4
b      6
a     -5
c      3
dtype: int64

In [10]:
obj2[obj2 > 0]

dog    4
b      6
c      3
dtype: int64

In [32]:
obj2 * obj2

d    16
b    36
a    25
c     9
dtype: int64

In [11]:
sdata = {'ohio': 35000, 'texas': 71000, 'oregon': 16000, 'utah': 5000}
obj3 = pd.Series(sdata)
obj3

ohio      35000
texas     71000
oregon    16000
utah       5000
dtype: int64

In [12]:
data = {'state':  ['ohio', 'ohio', 'ohio'],
      'year': [2000, 2001, 2002], 
      'pop': [1.5, 1.7, 3.6]
     }
df = pd.DataFrame(data)

In [35]:
df

Unnamed: 0,state,year,pop
0,ohio,2000,1.5
1,ohio,2001,1.7
2,ohio,2002,3.6


In [13]:
df.head(2)

Unnamed: 0,state,year,pop
0,ohio,2000,1.5
1,ohio,2001,1.7


In [14]:
df.tail(1)

Unnamed: 0,state,year,pop
2,ohio,2002,3.6


In [15]:
df.columns

Index(['state', 'year', 'pop'], dtype='object')

In [16]:
for idx, row in df.iterrows():
    print(row)
    break

state    ohio
year     2000
pop       1.5
Name: 0, dtype: object


In [17]:
df.iloc[0]

state    ohio
year     2000
pop       1.5
Name: 0, dtype: object

In [18]:
len(df)

3

In [19]:
for i in range(len(df)):
    print(df.iloc[i])
    break

state    ohio
year     2000
pop       1.5
Name: 0, dtype: object


In [20]:
for idx, row in df.iterrows():
    if row['pop'] > 1.6:
        print(row)

state    ohio
year     2001
pop       1.7
Name: 1, dtype: object
state    ohio
year     2002
pop       3.6
Name: 2, dtype: object


In [44]:
df['pop'] > 1.6

0    False
1     True
2     True
Name: pop, dtype: bool

In [45]:
df[df['pop'] > 1.6]

Unnamed: 0,state,year,pop
1,ohio,2001,1.7
2,ohio,2002,3.6


In [46]:
df[[False, True, True]]

Unnamed: 0,state,year,pop
1,ohio,2001,1.7
2,ohio,2002,3.6


In [47]:
df

Unnamed: 0,state,year,pop
0,ohio,2000,1.5
1,ohio,2001,1.7
2,ohio,2002,3.6
