In [1]:
import pandas as pd

### Pandas Series
- A Pandas Series is a one-dimensional labeled array that holds data of any type (integer, string, float, etc.). It is similar to a column in a DataFrame or a list in Python, but with more capabilities. Below are detailed notes on the Series object.

#### Create a series from list

In [2]:
s = pd.Series([3,4,5,6])

In [3]:
s

0    3
1    4
2    5
3    6
dtype: int64

#### Create series from dictionary

In [4]:
dict = {'a':5,'b':7,'c':85}

In [5]:
d = pd.Series(dict)

In [6]:
d

a     5
b     7
c    85
dtype: int64

### Indexing in Series:
- Series are indexed like arrays but can have custom labels for indices.
- **Default Indexing:** If no index is specified, Pandas automatically assigns a numerical index starting from 0.
- **Custom Indexing:** You can specify custom index labels.

In [7]:
series = pd.Series([7,8,45,78]) # As no index is given. It auto assign neumerical index
series

0     7
1     8
2    45
3    78
dtype: int64

In [8]:
series = pd.Series([6,7,4,6,8], index=['a','b','c','d','e'])
series

a    6
b    7
c    4
d    6
e    8
dtype: int64

### Data Access: 
- Data in a Series can be accessed using both integer index positions or label-based indexing.

#### Access by Position

In [9]:
series[1]

  series[1]


7

#### Access by location

In [10]:
series.iloc[1]

7

#### Access by label

In [11]:
series['a']

6

## Slicing:

In [12]:
series[1:3] 

b    7
c    4
dtype: int64

In [13]:
series['a':'c']

a    6
b    7
c    4
dtype: int64

### Vectorized Operations:
- Series support vectorized operations, meaning you can perform element-wise operations without using loops.

In [14]:
series

a    6
b    7
c    4
d    6
e    8
dtype: int64

In [15]:
series * 2

a    12
b    14
c     8
d    12
e    16
dtype: int64

### Basic statistical operation

In [16]:
series.isna().sum()

0

In [17]:
s

0    3
1    4
2    5
3    6
dtype: int64

In [18]:
s.count()

4

In [19]:
s.sum()

18

In [20]:
s.mean()

4.5

### Element-wise Operations:
- You can apply functions to each element of a Series using .apply()

####  Apply Functions with apply() and map():

In [21]:
s = s.apply(lambda x:x**2)

In [22]:
s

0     9
1    16
2    25
3    36
dtype: int64

In [23]:
def even(n):
    if n%2==0:
        return n

In [24]:
s.apply(even)

0     NaN
1    16.0
2     NaN
3    36.0
dtype: float64

In [25]:
result = s[s.apply(even).notnull()]
result

1    16
3    36
dtype: int64

In [26]:
s

0     9
1    16
2    25
3    36
dtype: int64

In [27]:
s.map(lambda x: x * 2)

0    18
1    32
2    50
3    72
dtype: int64

### Check data types in series

In [28]:
s.dtypes

dtype('int64')

In [29]:
s.sort_values() 

0     9
1    16
2    25
3    36
dtype: int64

In [30]:
s.sort_index()

0     9
1    16
2    25
3    36
dtype: int64

In [31]:
s[s % 2 == 0] 

1    16
3    36
dtype: int64

In [32]:
s = s[s%2==0]

In [33]:
s

1    16
3    36
dtype: int64

#### Name attribute in series
- We use it to assign name to our series, To remember in future

In [34]:
s = pd.Series([1, 2, 3], name="Numbers")

In [35]:
s

0    1
1    2
2    3
Name: Numbers, dtype: int64

In [36]:
s.name

'Numbers'

In [37]:
s.name = 'My Series'

In [38]:
s.name

'My Series'

In [39]:
s

0    1
1    2
2    3
Name: My Series, dtype: int64

### Changing the Data Type:

In [40]:
s.dtypes

dtype('int64')

In [41]:
s = s.astype(float)

In [42]:
s.dtypes

dtype('float64')

In [43]:
s

0    1.0
1    2.0
2    3.0
Name: My Series, dtype: float64

### Value Counts
- It count each uniques value from series. Useful for exploring frequency distributions or understanding the composition of data.

In [44]:
s = pd.Series([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
s.value_counts()

4    4
3    3
2    2
1    1
Name: count, dtype: int64

### Reindex

In [45]:
s.index

RangeIndex(start=0, stop=10, step=1)

In [46]:
s

0    1
1    2
2    2
3    3
4    3
5    3
6    4
7    4
8    4
9    4
dtype: int64

In [47]:
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])

In [48]:
s.index

Index(['a', 'b', 'c'], dtype='object')

In [49]:
s = s.reindex(['a', 'b', 'd'])

In [50]:
s.index

Index(['a', 'b', 'd'], dtype='object')

### String Operations:
- To perform str operation we need to specify str command

#### Changing upper, lower and other

In [51]:
s = pd.Series(['a', 'b', 'c'])
s.str.upper()

0    A
1    B
2    C
dtype: object

In [52]:
s.str.lower()

0    a
1    b
2    c
dtype: object

### Split

In [53]:
s = pd.Series(['a-b', 'c-d', 'e-f'])
s.str.split('-')

0    [a, b]
1    [c, d]
2    [e, f]
dtype: object

In [54]:
s

0    a-b
1    c-d
2    e-f
dtype: object

In [55]:
s.dtypes

dtype('O')

In [56]:
s = s.str.split('-')

In [57]:
s.dtypes

dtype('O')

In [58]:
s

0    [a, b]
1    [c, d]
2    [e, f]
dtype: object

### Convert series into Data Frame

In [59]:
s1 = pd.Series([1, 2, 3], name="A")
s2 = pd.Series([4, 5, 6], name="B")

In [65]:
df = pd.DataFrame({'A': s1, 'B': s2})

In [66]:
df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [68]:
row = [66,78]
df.iloc[1] = row

In [69]:
df

Unnamed: 0,A,B
0,1,4
1,66,78
2,3,6


In [72]:
df.loc[1] = [6,7]

In [73]:
df

Unnamed: 0,A,B
0,1,4
1,6,7
2,3,6


In [76]:
# New row data (as a DataFrame)
new_row = pd.DataFrame({'A': [7], 'B': [8]})

# Add the new row using pd.concat() to avoid overwriting
df = pd.concat([df, new_row], ignore_index=True)

In [77]:
df

Unnamed: 0,A,B
0,1,4
1,6,7
2,3,6
3,7,8


In [78]:
# Insert at position 1 (between index 0 and 1)
position = 1

# Split the DataFrame
df1 = df.iloc[:position]  # Part before the insertion
df2 = df.iloc[position:]  # Part after the insertion

# Concatenate the parts and the new row
df = pd.concat([df1, new_row, df2], ignore_index=True)

In [79]:
df

Unnamed: 0,A,B
0,1,4
1,7,8
2,6,7
3,3,6
4,7,8


In [80]:
# Convert the DataFrame to a list of lists
df_list = df.values.tolist()

# Insert the new row at position 1
df_list.insert(1, [17, 28])

# Convert back to DataFrame
df = pd.DataFrame(df_list, columns=df.columns)


In [81]:
df

Unnamed: 0,A,B
0,1,4
1,17,28
2,7,8
3,6,7
4,3,6
5,7,8


In [83]:
print(df.to_string())

    A   B
0   1   4
1  17  28
2   7   8
3   6   7
4   3   6
5   7   8
