-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataprocessingofcaloriedata.py
55 lines (33 loc) · 1.31 KB
/
dataprocessingofcaloriedata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pandas as pd
import numpy as np
df = pd.read_csv('calorie_info.csv', encoding = 'unicode_escape')
df.shape
df.head(10)
df.info()
pd.isnull(df)
pd.isnull(df).sum()
df.describe()
df["per_100_ml_or_gm"].replace("ml","",regex=True, inplace=True)
df["per_100_ml_or_gm"].replace("g","",regex=True, inplace=True)
pd.to_numeric(df["per_100_ml_or_gm"])
df["cal_per_100_ml_or_gms"].replace("cal","",regex=True, inplace=True)
pd.to_numeric(df["cal_per_100_ml_or_gms"])
df["kj_per_100_ml_or_gms"].replace("kJ","",regex=True, inplace=True)
pd.to_numeric(df["kj_per_100_ml_or_gms"])
df["cal_per_serving"].replace("cal","",regex=True, inplace=True)
pd.to_numeric(df["cal_per_serving"])
df["kj_per_serving"].replace("kJ","",regex=True, inplace=True)
pd.to_numeric(df["kj_per_serving"])
if df["per_ounces"].dtype == 'object':
df["per_ounces"].str.replace("fl. oz. (30 ml)","1",regex=True)
df["per_ounces"] = pd.to_numeric(df["per_ounces"])
pd.to_numeric(df["per_ounces"], errors='coerce')
df.drop("per_ounces", axis=1, inplace=True)
df["cal_per_ounces"].replace("cal","",regex=True, inplace=True)
pd.to_numeric(df["cal_per_ounces"])
df["cal_per_ounces"].astype(np.int64)
df.dropna(inplace=True)
df["kj_per_ounces"].replace("kJ","",regex=True, inplace=True)
pd.to_numeric(df["kj_per_ounces"])
df.head()
df.dropna(inplace=True)