### PANDAS SERIES

In [1]:
!pip3 install pandas

Collecting pandas
  Downloading pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl (10.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m17.9 MB/s[0m  [33m0:00:00[0meta [36m0:00:01[0m
[?25hDownloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, pandas
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [pandas]2m2/3[0m [pandas]
[1A[2KSuccessfully installed pandas-2.3.3 pytz-2025.2 tzdata-2025.2


In [2]:
# Importing
import numpy as np
import pandas as pd
import requests
from io import StringIO
import warnings

# Series
`Series` is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the index. The basic method to create a Series is to call:
```python
s = pd.Series(data, index=index)
```

Here, **data** can be many different things:
* Python dict

* an ndarray

* a scalar value (like 5)

The passed index is a list of axis labels. Thus, this separates into a few cases depending on what data is:

**From ndarray**
  - If data is an ndarray, index must be the same length as data. If no index is passed, one will be created having values [0, ..., len(data) - 1].

**From Dict**
  - If an index is passed, the values in data corresponding to the labels in the index will be pulled out.

**From Scaler**
  - If data is a scalar value, an index must be provided. The value will be repeated to match the length of index.

In [3]:
# Pandas Series Creation
# Method 1: Through List

# Series creation with default indexes
countries = ['India',"Pakistan","US","Russia","UK"]
pd.Series(countries)

# Series with integer values
marks = [50,60,70,80]
pd.Series(marks)

# Series creation with custom indexing
subject = ["Social Science","Maths","English","Science"]
marks_series = pd.Series(marks,index = subject)

# Method 2: Through dictionaries
marks = {
    'maths':67,
    'english':57,
    'science':89,
    'hindi':100
}
pd.Series(marks)

# Methid 3 : From Scaler
pd.Series(5,index = ['A','B','C','D'])

A    5
B    5
C    5
D    5
dtype: int64

### Series Attribute
* name - returns the name of the series
* size - returns the axis dimensions of the object, consistent with ndarray
* dtype - returns the datatype of the series
* index - returns the list of indexes of the series
* values - returns the list of values of the series


In [4]:
print(marks_series.size)
print(marks_series.dtype)
print(marks_series.index)
print(marks_series.values)
print(marks_series.empty)

4
int64
Index(['Social Science', 'Maths', 'English', 'Science'], dtype='object')
[50 60 70 80]
False


### Series import using CSV
- by default pandas read_csv method import the data as type = dataFrame but if we want to import it as a series then we can use .squeeze() to convert it into dataframe.
- Note: Before pandas 20 this squeeze is an attribute of read_csv method

In [5]:
def dataset(link):
    warnings.filterwarnings('ignore', message='Unverified HTTPS request')
    response = requests.get(link, verify=False)
    if response.status_code == 200:
        return StringIO(response.text)

In [6]:
# Dataset - 1
subs = pd.read_csv(dataset("https://drive.google.com/uc?export=download&id=1XQTnOAlodSzEqIQ0sJjnOv5zpG57OB4S")).squeeze()
subs

0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [7]:
# Dataset - 2
vk = pd.read_csv(dataset("https://drive.google.com/uc?export=download&id=19RVdLnwpCEO3GHzA3LuU2JhG7Or3UjFb"),index_col="match_no").squeeze()

In [8]:
# dataset - 3
movies = pd.read_csv(dataset("https://drive.google.com/uc?export=download&id=1H6XVxrhbinfe44s-ZHXKaGZF6gW_Qmzg"),index_col="movie").squeeze()
movies

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

### Series Method
* head() -> retunrs the series first 5 values
* tail() -> retunr the series last 5 values
* sample() -> return a random entry from the series
* value_counts() -> returns the freuqency of every datapoint in the series
* sort_values() -> returns the sorted order of the values -> acs default -> not a permanent function
* sort_index - >return the sorted order of the index -> asc default -> not a permanent function

In [9]:
movies.head()

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
Name: lead, dtype: object

In [10]:
movies.tail(2)

movie
Company (film)            Ajay Devgn
Awara Paagal Deewana    Akshay Kumar
Name: lead, dtype: object

In [11]:
movies.sample()
# you can specify the no of samples
movies.sample(3)

movie
Chori Chori (2003 film)          Ajay Devgn
Badla (2019 film)          Amitabh Bachchan
Memories in March              Deepti Naval
Name: lead, dtype: object

In [12]:
vk.value_counts()
movies.value_counts()

lead
Akshay Kumar        48
Amitabh Bachchan    45
Ajay Devgn          38
Salman Khan         31
Sanjay Dutt         26
                    ..
Diganth              1
Parveen Kaur         1
Seema Azmi           1
Akanksha Puri        1
Edwin Fernandes      1
Name: count, Length: 566, dtype: int64

In [13]:
subs.sort_values(ascending=False).head(1).values[0]

np.int64(396)

In [14]:
movies.sort_index()

movie
1920 (film)                   Rajniesh Duggall
1920: London                     Sharman Joshi
1920: The Evil Returns             Vicky Ahuja
1971 (2007 film)                Manoj Bajpayee
2 States (2014 film)              Arjun Kapoor
                                   ...        
Zindagi 50-50                      Veena Malik
Zindagi Na Milegi Dobara        Hrithik Roshan
Zindagi Tere Naam           Mithun Chakraborty
Zokkomon                       Darsheel Safary
Zor Lagaa Ke...Haiya!            Meghan Jadhav
Name: lead, Length: 1500, dtype: object

### Series Maths Method

* count
* sum, product
* mean,median,mode,std, var
* min
* max
* describe

In [15]:
print(vk.count())

215


In [16]:
print(subs.sum())

49510


In [20]:
print(subs.mean())
print(subs.median())
print(movies.mode())

135.64383561643837
123.0
0    Akshay Kumar
Name: lead, dtype: object


In [22]:
print(subs.min())
print(subs.max())

33
396


In [23]:
subs.describe()

count    365.000000
mean     135.643836
std       62.675023
min       33.000000
25%       88.000000
50%      123.000000
75%      177.000000
max      396.000000
Name: Subscribers gained, dtype: float64

### Indexing & Slicing

In [24]:
# Indexing
vk[3]

np.int64(13)

In [27]:
movies[-1]

  movies[-1]


'Akshay Kumar'

In [28]:
vk[5:19]

match_no
6      9
7     34
8      0
9     21
10     3
11    10
12    38
13     3
14    11
15    50
16     2
17    22
18    19
19    16
Name: runs, dtype: int64

In [29]:
vk[[3,6,7,15]]

match_no
3     13
6      9
7     34
15    50
Name: runs, dtype: int64

In [30]:
movies['1920 (film)']

'Rajniesh Duggall'

### Series with Python Functionalities


In [36]:
# len/min/type/dir/sorted
print(len(subs))
print(type(movies))
print(min(subs))
print(max(subs))
print(sorted(vk))

365
<class 'pandas.core.series.Series'>
33
396
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 16, 16, 16, 16, 17, 17, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 22, 22, 23, 23, 23, 23, 24, 24, 24, 25, 25, 25, 25, 25, 27, 27, 27, 28, 29, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 37, 38, 38, 38, 38, 39, 39, 39, 41, 41, 41, 42, 42, 43, 43, 44, 45, 46, 48, 48, 48, 48, 49, 50, 50, 51, 51, 51, 52, 53, 54, 54, 55, 56, 56, 57, 57, 57, 58, 58, 58, 58, 62, 62, 64, 65, 67, 67, 67, 68, 70, 70, 71, 72, 72, 73, 73, 73, 75, 75, 79, 80, 82, 84, 90, 92, 93, 99, 100, 100, 108, 109, 113]


In [37]:
# type conversion
list(marks_series)

[50, 60, 70, 80]

In [38]:
dict(marks_series)

{'Social Science': np.int64(50),
 'Maths': np.int64(60),
 'English': np.int64(70),
 'Science': np.int64(80)}

In [42]:
# Membership operator
'Akshay Kumar' in movies.values
'1920 (film)' in movies

True

In [44]:
# Arithematic operator - Broadcasting
100 - marks_series

Social Science    50
Maths             40
English           30
Science           20
dtype: int64

In [46]:
# Relational operator
vk[vk > 50]

match_no
34      58
41      71
44      56
45      67
52      70
57      57
68      73
71      51
73      58
74      65
80      57
81      93
82      99
85      56
97      67
99      73
103     51
104     62
110     82
116     75
117     79
119     80
120    100
122     52
123    108
126    109
127     75
128    113
129     54
131     54
132     62
134     64
137     55
141     58
144     57
145     92
148     68
152     70
160     84
162     67
164    100
175     72
178     90
188     72
197     51
198     53
209     58
213     73
Name: runs, dtype: int64