In [1]:
# pandas : data visualization tool 
import pandas as pd

Indexing with Pandas involves accessing specific rows and columns of a DataFrame or Series. Here are some common ways to do indexing in Pandas:

1. **Accessing Columns**: You can access a column in a DataFrame using square brackets or dot notation.

    ```python
    df['Column_Name']
    df.Column_Name
    ```

2. **Accessing Rows by Label**: You can use `.loc[]` to access rows by their label (index).

    ```python
    df.loc['Label']
    ```

3. **Accessing Rows by Integer Location**: You can use `.iloc[]` to access rows by their integer location.

    ```python
    df.iloc[integer_location]
    ```

4. **Accessing Specific Cells**: You can access a specific cell by specifying both the row and column.

    ```python
    df.loc['Label', 'Column_Name']
    ```

5. **Boolean Indexing**: You can use boolean conditions to filter rows based on certain criteria.

    ```python
    df[df['Column_Name'] > 10]
    ```

6. **Chained Indexing**: You can chain indexing operations together.

    ```python
    df.loc['Label']['Column_Name']
    ```


In [3]:
series = pd.Series([10, 12, 11, 12.5, 15], index=list('abcde'))
print(series)

a    10.0
b    12.0
c    11.0
d    12.5
e    15.0
dtype: float64


In [5]:
df = pd.DataFrame([('ravi', 10), ('subhansh', 18), ('saket', 8)], columns=['name', 'marks'])
print(df)

       name  marks
0      ravi     10
1  subhansh     18
2     saket      8


In [13]:
movies = pd.read_csv(r'/Users/amirkhan/Python Development/python academic/Batch2023-24/module2/pandas basics/Bollywood_movies.csv', index_col='Rank')
print(movies)

      Peak                         Film  Year               Director  \
Rank                                                                   
1        1                       Dangal  2016          Nitesh Tiwari   
2        1  Baahubali 2: The Conclusion  2017        S. S. Rajamouli   
3        3            Bajrangi Bhaijaan  2015             Kabir Khan   
4        3             Secret Superstar  2017         Advait Chandan   
5        1                           PK  2014        Rajkumar Hirani   
6        5                          2.0  2018              S.Shankar   
7        2     Baahubali: The Beginning  2015        S. S. Rajamouli   
8        4                       Sultan  2016        Ali Abbas Zafar   
9        8                        Sanju  2018        Rajkumar Hirani   
10       7                    Padmaavat  2018  Sanjay Leela Bhansali   
11       8              Tiger Zinda Hai  2017        Ali Abbas Zafar   
12       1                      Dhoom 3  2013  Vijay Krishna Ach

In [15]:
# indexing 
data = movies[['Film', 'Director']]
print(data, type(data))

                             Film               Director
Rank                                                    
1                          Dangal          Nitesh Tiwari
2     Baahubali 2: The Conclusion        S. S. Rajamouli
3               Bajrangi Bhaijaan             Kabir Khan
4                Secret Superstar         Advait Chandan
5                              PK        Rajkumar Hirani
6                             2.0              S.Shankar
7        Baahubali: The Beginning        S. S. Rajamouli
8                          Sultan        Ali Abbas Zafar
9                           Sanju        Rajkumar Hirani
10                      Padmaavat  Sanjay Leela Bhansali
11                Tiger Zinda Hai        Ali Abbas Zafar
12                        Dhoom 3  Vijay Krishna Acharya
13                            War        Siddharth Anand
14                       3 Idiots        Rajkumar Hirani
15                      Andhadhun        Sriram Raghavan
16                          Saa

In [19]:
movies.loc[:, ['Film', 'Year']]

Unnamed: 0_level_0,Film,Year
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Dangal,2016
2,Baahubali 2: The Conclusion,2017
3,Bajrangi Bhaijaan,2015
4,Secret Superstar,2017
5,PK,2014
6,2.0,2018
7,Baahubali: The Beginning,2015
8,Sultan,2016
9,Sanju,2018
10,Padmaavat,2018


In [36]:
movies.loc[movies['Year'] > 2016, ['Film', 'Producer', 'Year']]

Unnamed: 0_level_0,Film,Producer,Year
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,Baahubali 2: The Conclusion,Arka Media Works,2017
4,Secret Superstar,Aamir Khan Productions,2017
6,2.0,Lyca Productions,2018
9,Sanju,Rajkumar Hirani FilmsVinod Chopra Films,2018
10,Padmaavat,Bhansali Productions Viacom 18 Motion Pictures,2018
11,Tiger Zinda Hai,Yash Raj Films,2017
13,War,Yash Raj Films,2019
15,Andhadhun,Viacom 18 Motion Pictures Matchbox Pictures,2018
16,Saaho,UV Creations T-Series,2019
20,Simmba,Reliance Entertainment Dharma Productions,2018


In [33]:
import numpy as np

ar = np.array([10, 5, 3, 11, 12, 8, 6])
print(ar == 10)

[ True False False False False False False]


In [1]:
import pandas as pd

In [40]:
df = pd.DataFrame([[1, 2], [1.0, 2.0], [0, 0]], columns=['one', 'two'])
print(df)

   one  two
0  1.0  2.0
1  1.0  2.0
2  0.0  0.0


In [6]:
df[['one']]  # valid
df[['one', 'two']]  # valid

Unnamed: 0,one,two
0,1.0,2.0
1,1.0,2.0


In [16]:
df.loc[0:10]  # no error if last match not found in continuety 

Unnamed: 0,one,two
0,1.0,2.0
1,1.0,2.0


In [13]:
df.loc[0, 'one':'two']

one    1.0
two    2.0
Name: 0, dtype: float64

In [49]:
for i in df.items():
    print(i[0], type(i[0]))

one <class 'str'>
two <class 'str'>


In [53]:
for i in df.iterrows():
    print(i[1], type(i[1]))

one    1.0
two    2.0
Name: 0, dtype: float64 <class 'pandas.core.series.Series'>
one    1.0
two    2.0
Name: 1, dtype: float64 <class 'pandas.core.series.Series'>
one    0.0
two    0.0
Name: 2, dtype: float64 <class 'pandas.core.series.Series'>


In [14]:
df.loc['one':'three']

Unnamed: 0,one,two


In [9]:
df.loc[0:1]

Unnamed: 0,one,two
0,1.0,2.0
1,1.0,2.0


In [21]:
df.loc[0:1]

Unnamed: 0,one,two
0,1.0,2.0
1,1.0,2.0


In [22]:
df.index

RangeIndex(start=0, stop=2, step=1)

In [23]:
df = pd.DataFrame([[1, 2], [1.0, 2.0]], columns=['one', 'two'], index=[3, 2])
print(df)

   one  two
3  1.0  2.0
2  1.0  2.0


In [26]:
df.index

Index([3, 2], dtype='int64')

In [55]:
path = r'/Users/amirkhan/Python Development/python academic/Batch2023-24/module2/pandas basics/'
df = pd.read_csv(path + 'nba-2.csv')
print(df)

              Name            Team  Number Position   Age Height  Weight  \
0    Avery Bradley  Boston Celtics     0.0       PG  25.0    6-2   180.0   
1      Jae Crowder  Boston Celtics    99.0       SF  25.0    6-6   235.0   
2     John Holland  Boston Celtics    30.0       SG  27.0    6-5   205.0   
3      R.J. Hunter  Boston Celtics    28.0       SG  22.0    6-5   185.0   
4    Jonas Jerebko  Boston Celtics     8.0       PF  29.0   6-10   231.0   
..             ...             ...     ...      ...   ...    ...     ...   
453   Shelvin Mack       Utah Jazz     8.0       PG  26.0    6-3   203.0   
454      Raul Neto       Utah Jazz    25.0       PG  24.0    6-1   179.0   
455   Tibor Pleiss       Utah Jazz    21.0        C  26.0    7-3   256.0   
456    Jeff Withey       Utah Jazz    24.0        C  26.0    7-0   231.0   
457            NaN             NaN     NaN      NaN   NaN    NaN     NaN   

               College     Salary  
0                Texas  7730337.0  
1            Ma

In [56]:
df.loc[df['Position']=='SG', 'Name']

2          John Holland
3           R.J. Hunter
12          Evan Turner
13          James Young
15     Bojan Bogdanovic
             ...       
433    Gerald Henderson
437       C.J. McCollum
438        Luis Montero
444          Alec Burks
449         Rodney Hood
Name: Name, Length: 102, dtype: object

In [37]:
for i in df:
    print(i)

print(len(df))

Name
Team
Number
Position
Age
Height
Weight
College
Salary
458


In [62]:
df.loc[(df['Age'] > 20) & (df['Age'] < 28), 'Age']

0      25.0
1      25.0
2      27.0
3      22.0
6      21.0
       ... 
451    26.0
453    26.0
454    24.0
455    26.0
456    26.0
Name: Age, Length: 255, dtype: float64