In [212]:
import pandas as pd
import numpy as np
import datetime as dt
np.random.seed(338)

build_data = pd.DataFrame({'Integer': np.random.randint(0, 10, 10),
                           'Float': np.random.rand(10),
                           'String/Text':["numpy","pandas","PyTorch",
                                          "TensorFlow", "matplotlib","Dash",
                                          "colab","datetime","silly",
                                          "LLM"
                                          ],
                           "True or False": [np.random.choice([True,False]) for b in range(10)]
                           }
                        )


In [213]:
build_data

Unnamed: 0,Integer,Float,String/Text,True or False
0,8,0.478767,numpy,True
1,5,0.929037,pandas,False
2,5,0.480968,PyTorch,True
3,9,0.380756,TensorFlow,False
4,4,0.630398,matplotlib,True
5,5,0.710537,Dash,True
6,3,0.187097,colab,False
7,5,0.689032,datetime,True
8,7,0.51602,silly,True
9,0,0.285548,LLM,True


In [214]:
date_index = pd.date_range(start='1/1/2024', periods=10, freq='D')

In [215]:
build_data.set_index(date_index, inplace=True)
build_data

Unnamed: 0,Integer,Float,String/Text,True or False
2024-01-01,8,0.478767,numpy,True
2024-01-02,5,0.929037,pandas,False
2024-01-03,5,0.480968,PyTorch,True
2024-01-04,9,0.380756,TensorFlow,False
2024-01-05,4,0.630398,matplotlib,True
2024-01-06,5,0.710537,Dash,True
2024-01-07,3,0.187097,colab,False
2024-01-08,5,0.689032,datetime,True
2024-01-09,7,0.51602,silly,True
2024-01-10,0,0.285548,LLM,True


In [216]:
build_data.to_csv("build_data.csv",index=False)


In [217]:
data = pd.read_csv("build_data.csv")

In [218]:
data.head()

Unnamed: 0,Integer,Float,String/Text,True or False
0,8,0.478767,numpy,True
1,5,0.929037,pandas,False
2,5,0.480968,PyTorch,True
3,9,0.380756,TensorFlow,False
4,4,0.630398,matplotlib,True


In [219]:
build_data.to_csv("build_data.csv",index=True)
data = pd.read_csv("build_data.csv",index_col=0)
data.head()

Unnamed: 0,Integer,Float,String/Text,True or False
2024-01-01,8,0.478767,numpy,True
2024-01-02,5,0.929037,pandas,False
2024-01-03,5,0.480968,PyTorch,True
2024-01-04,9,0.380756,TensorFlow,False
2024-01-05,4,0.630398,matplotlib,True


In [220]:
data.tail(6)

Unnamed: 0,Integer,Float,String/Text,True or False
2024-01-05,4,0.630398,matplotlib,True
2024-01-06,5,0.710537,Dash,True
2024-01-07,3,0.187097,colab,False
2024-01-08,5,0.689032,datetime,True
2024-01-09,7,0.51602,silly,True
2024-01-10,0,0.285548,LLM,True


In [221]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 2024-01-01 to 2024-01-10
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Integer        10 non-null     int64  
 1   Float          10 non-null     float64
 2   String/Text    10 non-null     object 
 3   True or False  10 non-null     bool   
dtypes: bool(1), float64(1), int64(1), object(1)
memory usage: 330.0+ bytes


In [222]:
data["Integer"].iloc[0]

8

In [223]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 2024-01-01 to 2024-01-10
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Integer        10 non-null     int64  
 1   Float          10 non-null     float64
 2   String/Text    10 non-null     object 
 3   True or False  10 non-null     bool   
dtypes: bool(1), float64(1), int64(1), object(1)
memory usage: 330.0+ bytes


In [224]:
try_sort = data.sort_values(by="Integer",ascending=False)
try_sort

Unnamed: 0,Integer,Float,String/Text,True or False
2024-01-04,9,0.380756,TensorFlow,False
2024-01-01,8,0.478767,numpy,True
2024-01-09,7,0.51602,silly,True
2024-01-02,5,0.929037,pandas,False
2024-01-03,5,0.480968,PyTorch,True
2024-01-06,5,0.710537,Dash,True
2024-01-08,5,0.689032,datetime,True
2024-01-05,4,0.630398,matplotlib,True
2024-01-07,3,0.187097,colab,False
2024-01-10,0,0.285548,LLM,True


In [225]:
data.describe()

Unnamed: 0,Integer,Float
count,10.0,10.0
mean,5.1,0.528816
std,2.558211,0.219094
min,0.0,0.187097
25%,4.25,0.405258
50%,5.0,0.498494
75%,6.5,0.674373
max,9.0,0.929037


In [226]:
data.set_index(data["Integer"],inplace=False)

Unnamed: 0_level_0,Integer,Float,String/Text,True or False
Integer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
8,8,0.478767,numpy,True
5,5,0.929037,pandas,False
5,5,0.480968,PyTorch,True
9,9,0.380756,TensorFlow,False
4,4,0.630398,matplotlib,True
5,5,0.710537,Dash,True
3,3,0.187097,colab,False
5,5,0.689032,datetime,True
7,7,0.51602,silly,True
0,0,0.285548,LLM,True


In [227]:
data["String/Text"].apply(lambda s:s.title())

2024-01-01         Numpy
2024-01-02        Pandas
2024-01-03       Pytorch
2024-01-04    Tensorflow
2024-01-05    Matplotlib
2024-01-06          Dash
2024-01-07         Colab
2024-01-08      Datetime
2024-01-09         Silly
2024-01-10           Llm
Name: String/Text, dtype: object

In [228]:
data.head()

Unnamed: 0,Integer,Float,String/Text,True or False
2024-01-01,8,0.478767,numpy,True
2024-01-02,5,0.929037,pandas,False
2024-01-03,5,0.480968,PyTorch,True
2024-01-04,9,0.380756,TensorFlow,False
2024-01-05,4,0.630398,matplotlib,True


In [229]:
data.rename(columns = {'String/Text': 'Library/Framework'}, inplace=True)
data

Unnamed: 0,Integer,Float,Library/Framework,True or False
2024-01-01,8,0.478767,numpy,True
2024-01-02,5,0.929037,pandas,False
2024-01-03,5,0.480968,PyTorch,True
2024-01-04,9,0.380756,TensorFlow,False
2024-01-05,4,0.630398,matplotlib,True
2024-01-06,5,0.710537,Dash,True
2024-01-07,3,0.187097,colab,False
2024-01-08,5,0.689032,datetime,True
2024-01-09,7,0.51602,silly,True
2024-01-10,0,0.285548,LLM,True


In [230]:
data.drop(columns=["True or False"],inplace=False)

Unnamed: 0,Integer,Float,Library/Framework
2024-01-01,8,0.478767,numpy
2024-01-02,5,0.929037,pandas
2024-01-03,5,0.480968,PyTorch
2024-01-04,9,0.380756,TensorFlow
2024-01-05,4,0.630398,matplotlib
2024-01-06,5,0.710537,Dash
2024-01-07,3,0.187097,colab
2024-01-08,5,0.689032,datetime
2024-01-09,7,0.51602,silly
2024-01-10,0,0.285548,LLM


In [231]:
true_data = data[data["True or False"] == True]
true_data

Unnamed: 0,Integer,Float,Library/Framework,True or False
2024-01-01,8,0.478767,numpy,True
2024-01-03,5,0.480968,PyTorch,True
2024-01-05,4,0.630398,matplotlib,True
2024-01-06,5,0.710537,Dash,True
2024-01-08,5,0.689032,datetime,True
2024-01-09,7,0.51602,silly,True
2024-01-10,0,0.285548,LLM,True


In [232]:
data["True or False"].value_counts()

True or False
True     7
False    3
Name: count, dtype: int64

In [233]:
libs = ["numpy","pandas","PyTorch","TensorFlow", "matplotlib","Dash","datetime"]

In [234]:
check_lib = data["Library/Framework"].apply(lambda x: x in libs)
check_lib

2024-01-01     True
2024-01-02     True
2024-01-03     True
2024-01-04     True
2024-01-05     True
2024-01-06     True
2024-01-07    False
2024-01-08     True
2024-01-09    False
2024-01-10    False
Name: Library/Framework, dtype: bool

In [235]:
check_lib.name = "Is It Library?"
check_lib

2024-01-01     True
2024-01-02     True
2024-01-03     True
2024-01-04     True
2024-01-05     True
2024-01-06     True
2024-01-07    False
2024-01-08     True
2024-01-09    False
2024-01-10    False
Name: Is It Library?, dtype: bool

In [236]:
data.drop(columns=["True or False"],inplace=True)


In [237]:
data

Unnamed: 0,Integer,Float,Library/Framework
2024-01-01,8,0.478767,numpy
2024-01-02,5,0.929037,pandas
2024-01-03,5,0.480968,PyTorch
2024-01-04,9,0.380756,TensorFlow
2024-01-05,4,0.630398,matplotlib
2024-01-06,5,0.710537,Dash
2024-01-07,3,0.187097,colab
2024-01-08,5,0.689032,datetime
2024-01-09,7,0.51602,silly
2024-01-10,0,0.285548,LLM


In [238]:
combined = pd.concat([data, check_lib], axis=1)

In [239]:
combined

Unnamed: 0,Integer,Float,Library/Framework,Is It Library?
2024-01-01,8,0.478767,numpy,True
2024-01-02,5,0.929037,pandas,True
2024-01-03,5,0.480968,PyTorch,True
2024-01-04,9,0.380756,TensorFlow,True
2024-01-05,4,0.630398,matplotlib,True
2024-01-06,5,0.710537,Dash,True
2024-01-07,3,0.187097,colab,False
2024-01-08,5,0.689032,datetime,True
2024-01-09,7,0.51602,silly,False
2024-01-10,0,0.285548,LLM,False


In [240]:
data.isnull().sum()

Integer              0
Float                0
Library/Framework    0
dtype: int64

In [241]:
data["Integer"].replace(0, np.nan, inplace=True)
data

Unnamed: 0,Integer,Float,Library/Framework
2024-01-01,8.0,0.478767,numpy
2024-01-02,5.0,0.929037,pandas
2024-01-03,5.0,0.480968,PyTorch
2024-01-04,9.0,0.380756,TensorFlow
2024-01-05,4.0,0.630398,matplotlib
2024-01-06,5.0,0.710537,Dash
2024-01-07,3.0,0.187097,colab
2024-01-08,5.0,0.689032,datetime
2024-01-09,7.0,0.51602,silly
2024-01-10,,0.285548,LLM


In [242]:
data.fillna(data["Integer"].mean(), inplace=True)
data

Unnamed: 0,Integer,Float,Library/Framework
2024-01-01,8.0,0.478767,numpy
2024-01-02,5.0,0.929037,pandas
2024-01-03,5.0,0.480968,PyTorch
2024-01-04,9.0,0.380756,TensorFlow
2024-01-05,4.0,0.630398,matplotlib
2024-01-06,5.0,0.710537,Dash
2024-01-07,3.0,0.187097,colab
2024-01-08,5.0,0.689032,datetime
2024-01-09,7.0,0.51602,silly
2024-01-10,5.666667,0.285548,LLM


In [248]:
data["Integer"] =data["Integer"].astype("int")
data

Unnamed: 0,Integer,Float,Library/Framework
2024-01-01,8,0.478767,numpy
2024-01-02,5,0.929037,pandas
2024-01-03,5,0.480968,PyTorch
2024-01-04,9,0.380756,TensorFlow
2024-01-05,4,0.630398,matplotlib
2024-01-06,5,0.710537,Dash
2024-01-07,3,0.187097,colab
2024-01-08,5,0.689032,datetime
2024-01-09,7,0.51602,silly
2024-01-10,5,0.285548,LLM
