# Ex 24 - Selective Updating

In [15]:
import pandas as pd
import numpy as np
from pandas import DataFrame, Series

In [16]:
temps = pd.read_csv('/Users/blauerbock/workspaces/pandas-workout/data/nyc-temps.txt').squeeze()
len_hours = int(728/8)
hours = [0,3,6,9,12,15,18,21] * len_hours
df = DataFrame({'temp': temps,
               'hour': hours})
df.shape

(728, 2)

In [17]:
df

Unnamed: 0,temp,hour
0,-1,0
1,-1,3
2,-1,6
3,-1,9
4,-1,12
...,...,...
723,2,9
724,2,12
725,2,15
726,2,18


In [19]:
df[df['temp'] < 0]['temp'] = 0  # WRONG

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[df['temp'] < 0]['temp'] = 0  # WRONG


In [20]:
df.loc[df['temp'] < 0, 'temp'] = 0

In [21]:
df

Unnamed: 0,temp,hour
0,0,0
1,0,3
2,0,6
3,0,9
4,0,12
...,...,...
723,2,9
724,2,12
725,2,15
726,2,18


## Set all the odd temperatures to the mean of all the temperatures.

In [29]:
temps = pd.read_csv('/Users/blauerbock/workspaces/pandas-workout/data/nyc-temps.txt').squeeze()
len_hours = int(728/8)
hours = [0,3,6,9,12,15,18,21] * len_hours
df = DataFrame({'temp': temps * 1.0,
               'hour': hours})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 728 entries, 0 to 727
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   temp    728 non-null    float64
 1   hour    728 non-null    int64  
dtypes: float64(1), int64(1)
memory usage: 11.5 KB


In [30]:
mean_of_all_temps = df['temp'].mean()
mean_of_all_temps

-1.0508241758241759

In [31]:
df.loc[df['temp'] % 2 != 0,'temp'] = mean_of_all_temps

In [32]:
df

Unnamed: 0,temp,hour
0,-1.050824,0
1,-1.050824,3
2,-1.050824,6
3,-1.050824,9
4,-1.050824,12
...,...,...
723,2.000000,9
724,2.000000,12
725,2.000000,15
726,2.000000,18


## Set the even temperatures at hours 9 and 18 to 3.

In [58]:
temps = pd.read_csv('/Users/blauerbock/workspaces/pandas-workout/data/nyc-temps.txt').squeeze()
len_hours = int(728/8)
hours = [0,3,6,9,12,15,18,21] * len_hours
df = DataFrame({'temp': temps,
               'hour': hours})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 728 entries, 0 to 727
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   temp    728 non-null    int64
 1   hour    728 non-null    int64
dtypes: int64(2)
memory usage: 11.5 KB


In [68]:
df.loc[df['temp'] % 2 == 0 & df['hour'].isin([9,18]),'temp'] = 3

In [72]:
df.loc[(df['temp'] == 3) & df['hour'].isin([9,18])]

Unnamed: 0,temp,hour
35,3,9
38,3,18
43,3,9
46,3,18
51,3,9
...,...,...
686,3,18
715,3,9
718,3,18
723,3,9


In [66]:
df.loc[df['temp'] % 2 == 0][df['hour'].isin([9,18])]

  df.loc[df['temp'] % 2 == 0][df['hour'].isin([9,18])]


Unnamed: 0,temp,hour
35,8,9
38,8,18
43,4,9
46,4,18
67,-2,9
...,...,...
686,-10,18
715,-4,9
718,-4,18
723,2,9


In [56]:
df.loc[
    df['hour'].isin([9, 18]), 
    'temp'] = 3

df

Unnamed: 0,temp,hour
0,-1,0
1,-1,3
2,-1,6
3,3,9
4,-1,12
...,...,...
723,3,9
724,2,12
725,2,15
726,3,18


In [57]:
df.loc[((df['temp'] % 2 == 0) & df['hour'].isin([9,18]))]  #, 'temp'] == 3

Unnamed: 0,temp,hour


In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 728 entries, 0 to 727
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   temp    728 non-null    int64
 1   hour    728 non-null    int64
dtypes: int64(2)
memory usage: 11.5 KB


In [49]:
(df['temp'] % 2 == 0)

TypeError: 'int' object is not subscriptable

## If the hour is odd, set the temperature to 5.

In [73]:
temps = pd.read_csv('/Users/blauerbock/workspaces/pandas-workout/data/nyc-temps.txt').squeeze()
len_hours = int(728/8)
hours = [0,3,6,9,12,15,18,21] * len_hours
df = DataFrame({'temp': temps,
               'hour': hours})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 728 entries, 0 to 727
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   temp    728 non-null    int64
 1   hour    728 non-null    int64
dtypes: int64(2)
memory usage: 11.5 KB


In [78]:
df.loc[df['hour'] % 2 == 1, 'temp'] = 5

In [79]:
df.loc[df['hour'] % 2 == 1, 'temp']

1      5
3      5
5      5
7      5
9      5
      ..
719    5
721    5
723    5
725    5
727    5
Name: temp, Length: 364, dtype: int64