In [1]:
import pandas as pd

In [3]:
df = pd.DataFrame({
    'name':['Alice','Bob','Charlie','David'],
    'Score':[92,65,70,48]
})


df['grade'] = pd.cut(df['Score'],
                     bins=[0,59,69,79,89,100],
                     labels=['F','D','C','B','A'],
                     right=True
                     
                     )

In [4]:
print(df)

      name  Score grade
0    Alice     92     A
1      Bob     65     D
2  Charlie     70     C
3    David     48     F


In [6]:
import numpy as np


conditions = [
    (df['Score'] >= 90),
    (df['Score'] >= 70) & (df['Score'] < 90),
    (df['Score'] < 70)
]

In [9]:
choices = ['Exellent','Satisfactory','Poor']




In [10]:
df['performance'
] = np.select(conditions,choices,default='Unknown')

In [11]:
print(df)

      name  Score grade   performance
0    Alice     92     A      Exellent
1      Bob     65     D          Poor
2  Charlie     70     C  Satisfactory
3    David     48     F          Poor


In [12]:
df = pd.DataFrame({
    'group':['A','A','B','B'],
    'value':[100,150,200,250]
})

In [13]:
df['normalized'] = df['value'] / df.groupby('group')['value'].transform('sum')

In [14]:
print(df)

  group  value  normalized
0     A    100    0.400000
1     A    150    0.600000
2     B    200    0.444444
3     B    250    0.555556


In [15]:
df = pd.DataFrame({
    'name':['Alice','Bob','Charlie','David'],
    'score':[92,65,70,48],
    'age':[20,22,21,23]
})

In [16]:
df.loc[(df['score'] > 70) & (df['age'] < 21),'final_status'] = 'Pass'

In [17]:
print(df)

      name  score  age final_status
0    Alice     92   20         Pass
1      Bob     65   22          NaN
2  Charlie     70   21          NaN
3    David     48   23          NaN


In [18]:
df = pd.DataFrame({
    'day':pd.date_range('2025-01-01',periods=6),
    'sales':[100,200,150,130,500,300]
})

In [19]:
df['rolling_avg'] = df['sales'].rolling(window=3).mean()

In [20]:
df['spike'] = df['sales'] > df['rolling_avg'] * 1.5

In [21]:
print(df)

         day  sales  rolling_avg  spike
0 2025-01-01    100          NaN  False
1 2025-01-02    200          NaN  False
2 2025-01-03    150        150.0  False
3 2025-01-04    130        160.0  False
4 2025-01-05    500        260.0   True
5 2025-01-06    300        310.0  False


In [22]:
df = pd.DataFrame({
    'department':['HR','HR','Sales','Sales','Sales'],
    'employee':['A','B','C','D','E'],
    'sales':[50,70,100,200,250]
})

In [23]:
df['top_quartile'] = df.groupby('department')['sales'].transform(
    lambda x: x >= x.quantile(0.75)
)

In [24]:
print(df)

  department employee  sales  top_quartile
0         HR        A     50         False
1         HR        B     70          True
2      Sales        C    100         False
3      Sales        D    200         False
4      Sales        E    250          True


In [26]:
df['is_top'] = df.groupby('department')['sales'].transform('max') == df['sales']

In [27]:
print(df)

  department employee  sales  top_quartile  is_top
0         HR        A     50         False   False
1         HR        B     70          True    True
2      Sales        C    100         False   False
3      Sales        D    200         False   False
4      Sales        E    250          True    True


In [28]:
df['rank'] = df.groupby('department')['sales'].rank(ascending=False)

In [29]:
print(df)

  department employee  sales  top_quartile  is_top  rank
0         HR        A     50         False   False   2.0
1         HR        B     70          True    True   1.0
2      Sales        C    100         False   False   3.0
3      Sales        D    200         False   False   2.0
4      Sales        E    250          True    True   1.0


In [30]:
df['dept_avg'] = df.groupby('department')['sales'].transform('mean')

In [31]:
df['abovie_avg'] = df['sales'] > df['dept_avg']

In [32]:
print(df)

  department employee  sales  top_quartile  is_top  rank    dept_avg  \
0         HR        A     50         False   False   2.0   60.000000   
1         HR        B     70          True    True   1.0   60.000000   
2      Sales        C    100         False   False   3.0  183.333333   
3      Sales        D    200         False   False   2.0  183.333333   
4      Sales        E    250          True    True   1.0  183.333333   

   abovie_avg  
0       False  
1        True  
2       False  
3        True  
4        True  


In [33]:
df['quartile'] = df.groupby('department')['sales'].transform(
    lambda x: pd.qcut(x,q=4,labels=['Q1','Q2','Q3','Q4'])
)

In [34]:
print(df)

  department employee  sales  top_quartile  is_top  rank    dept_avg  \
0         HR        A     50         False   False   2.0   60.000000   
1         HR        B     70          True    True   1.0   60.000000   
2      Sales        C    100         False   False   3.0  183.333333   
3      Sales        D    200         False   False   2.0  183.333333   
4      Sales        E    250          True    True   1.0  183.333333   

   abovie_avg quartile  
0       False       Q1  
1        True       Q4  
2       False       Q1  
3        True       Q2  
4        True       Q4  


In [35]:
df['bottom_20'] = df.groupby('department')['sales'].transform(
    lambda x: x <= x.quantile(0.2)
)

In [36]:
print(df)

  department employee  sales  top_quartile  is_top  rank    dept_avg  \
0         HR        A     50         False   False   2.0   60.000000   
1         HR        B     70          True    True   1.0   60.000000   
2      Sales        C    100         False   False   3.0  183.333333   
3      Sales        D    200         False   False   2.0  183.333333   
4      Sales        E    250          True    True   1.0  183.333333   

   abovie_avg quartile  bottom_20  
0       False       Q1       True  
1        True       Q4      False  
2       False       Q1       True  
3        True       Q2      False  
4        True       Q4      False  


In [37]:
df['dept_mean'] = df.groupby('department')['sales'].transform('mean')

In [41]:
df['dept_std'] = df.groupby('department')['sales'].transform('std')

In [38]:
print(df)

  department employee  sales  top_quartile  is_top  rank    dept_avg  \
0         HR        A     50         False   False   2.0   60.000000   
1         HR        B     70          True    True   1.0   60.000000   
2      Sales        C    100         False   False   3.0  183.333333   
3      Sales        D    200         False   False   2.0  183.333333   
4      Sales        E    250          True    True   1.0  183.333333   

   abovie_avg quartile  bottom_20   dept_mean  
0       False       Q1       True   60.000000  
1        True       Q4      False   60.000000  
2       False       Q1       True  183.333333  
3        True       Q2      False  183.333333  
4        True       Q4      False  183.333333  


In [42]:
df['z_score'] = (df['sales'] - df['dept_mean']) / df['dept_std']

In [43]:
print(df)

  department employee  sales  top_quartile  is_top  rank    dept_avg  \
0         HR        A     50         False   False   2.0   60.000000   
1         HR        B     70          True    True   1.0   60.000000   
2      Sales        C    100         False   False   3.0  183.333333   
3      Sales        D    200         False   False   2.0  183.333333   
4      Sales        E    250          True    True   1.0  183.333333   

   abovie_avg quartile  bottom_20   dept_mean   dept_std   z_score  
0       False       Q1       True   60.000000  14.142136 -0.707107  
1        True       Q4      False   60.000000  14.142136  0.707107  
2       False       Q1       True  183.333333  76.376262 -1.091089  
3        True       Q2      False  183.333333  76.376262  0.218218  
4        True       Q4      False  183.333333  76.376262  0.872872  


In [44]:
df['dept_total'] = df.groupby('department')['sales'].transform('sum')

In [45]:
df['contribution_%'] = (df['sales'] / df['dept_total']) * 100

In [46]:
print(df)

  department employee  sales  top_quartile  is_top  rank    dept_avg  \
0         HR        A     50         False   False   2.0   60.000000   
1         HR        B     70          True    True   1.0   60.000000   
2      Sales        C    100         False   False   3.0  183.333333   
3      Sales        D    200         False   False   2.0  183.333333   
4      Sales        E    250          True    True   1.0  183.333333   

   abovie_avg quartile  bottom_20   dept_mean   dept_std   z_score  \
0       False       Q1       True   60.000000  14.142136 -0.707107   
1        True       Q4      False   60.000000  14.142136  0.707107   
2       False       Q1       True  183.333333  76.376262 -1.091089   
3        True       Q2      False  183.333333  76.376262  0.218218   
4        True       Q4      False  183.333333  76.376262  0.872872   

   dept_total  contribution_%  
0         120       41.666667  
1         120       58.333333  
2         550       18.181818  
3         550     

In [49]:
df['bonus_eligible'] = df.groupby('department')['sales'].rank(ascending=False) <= 1

In [50]:
print(df)

  department employee  sales  top_quartile  is_top  rank    dept_avg  \
0         HR        A     50         False   False   2.0   60.000000   
1         HR        B     70          True    True   1.0   60.000000   
2      Sales        C    100         False   False   3.0  183.333333   
3      Sales        D    200         False   False   2.0  183.333333   
4      Sales        E    250          True    True   1.0  183.333333   

   abovie_avg quartile  bottom_20   dept_mean   dept_std   z_score  \
0       False       Q1       True   60.000000  14.142136 -0.707107   
1        True       Q4      False   60.000000  14.142136  0.707107   
2       False       Q1       True  183.333333  76.376262 -1.091089   
3        True       Q2      False  183.333333  76.376262  0.218218   
4        True       Q4      False  183.333333  76.376262  0.872872   

   dept_total  contribution_%  bonus_eligible  
0         120       41.666667           False  
1         120       58.333333            True  
2 

In [51]:
df['dept_size'] = df.groupby('department')['employee'].transform('count')

In [52]:
filtered = df[df['dept_size'] > 2]

In [53]:
print(filtered)

  department employee  sales  top_quartile  is_top  rank    dept_avg  \
2      Sales        C    100         False   False   3.0  183.333333   
3      Sales        D    200         False   False   2.0  183.333333   
4      Sales        E    250          True    True   1.0  183.333333   

   abovie_avg quartile  bottom_20   dept_mean   dept_std   z_score  \
2       False       Q1       True  183.333333  76.376262 -1.091089   
3        True       Q2      False  183.333333  76.376262  0.218218   
4        True       Q4      False  183.333333  76.376262  0.872872   

   dept_total  contribution_%  bonus_eligible  dept_size  
2         550       18.181818           False          3  
3         550       36.363636           False          3  
4         550       45.454545            True          3  


In [54]:
import pandas as pd

df1 = pd.DataFrame({
    'team': ['Alpha', 'Alpha', 'Alpha', 'Beta', 'Beta', 'Gamma', 'Gamma', 'Gamma'],
    'employee': ['Anna', 'Ben', 'Cara', 'Dan', 'Ella', 'Finn', 'Gail', 'Hank'],
    'project_hours': [120, 80, 100, 90, 110, 130, 70, 100],
    'bugs_fixed': [45, 30, 25, 40, 50, 60, 20, 30],
    'features_added': [10, 8, 6, 5, 12, 15, 4, 6]
})


In [57]:
df1['effort_score'] = df1['project_hours'] * 0.5 + df1['bugs_fixed'] * 1.5 + df1['features_added'] * 3

In [59]:
print(df1[['employee','effort_score']])

  employee  effort_score
0     Anna         157.5
1      Ben         109.0
2     Cara         105.5
3      Dan         120.0
4     Ella         166.0
5     Finn         200.0
6     Gail          77.0
7     Hank         113.0


In [62]:
df1['team_percentile'] = df1.groupby('team')['effort_score'].rank(pct=True)

In [63]:
print(df1)

    team employee  project_hours  bugs_fixed  features_added  effort_score  \
0  Alpha     Anna            120          45              10         157.5   
1  Alpha      Ben             80          30               8         109.0   
2  Alpha     Cara            100          25               6         105.5   
3   Beta      Dan             90          40               5         120.0   
4   Beta     Ella            110          50              12         166.0   
5  Gamma     Finn            130          60              15         200.0   
6  Gamma     Gail             70          20               4          77.0   
7  Gamma     Hank            100          30               6         113.0   

   team_percentile  
0         1.000000  
1         0.666667  
2         0.333333  
3         0.500000  
4         1.000000  
5         1.000000  
6         0.333333  
7         0.666667  
