In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats


# 노트북 안에 그래프 그리기 위해
%matplotlib inline

# 그래프에서 격자로 숫자 범위가 눈에 잘 띄도록 gglot 스타일 사용
plt.style.use('ggplot')

# 그래프에서 마이너스 폰트 깨지는 문제에 대한 대처
mpl.rcParams['axes.unicode_minus'] = False

plt.rcParams['font.family'] = 'NanumGothic'

In [6]:
# 데이터 로드
df = pd.read_csv("data/SmartFarm 축사 환경 센싱 정보_20211028_114032.csv")
df.shape

(1717, 7)

In [7]:
# 데이터 컬럼 이름 지정
df.columns = ['Time','Collecting device ID', 'Barn number', 'Module ID', 'Type', 'Value', 'Sensor time']
df

Unnamed: 0,Time,Collecting device ID,Barn number,Module ID,Type,Value,Sensor time
0,2021-09-17 00:02:02,GW01,H01,1102,co2,544.000000,20210916150001
1,2021-09-17 00:02:02,GW01,H01,1102,nh3,1.322386,20210916150001
2,2021-09-17 00:02:30,GW01,H01,2002,humidity,79.452510,20210917000230
3,2021-09-17 00:02:52,GW01,H02,2001,humidity,78.162700,20210101000000
4,2021-09-17 00:02:30,GW01,H01,2002,temp,24.694910,20210917000230
...,...,...,...,...,...,...,...
1712,2021-09-17 23:57:02,GW01,H02,2001,temp,23.705080,20210101000000
1713,2021-09-17 23:55:26,GW01,H01,2000,temp,23.498310,20210101000000
1714,2021-09-17 23:55:26,GW01,H01,2000,humidity,82.140720,20210101000000
1715,2021-09-17 23:56:19,GW01,H01,1100,nh3,0.023635,20210101000000


In [8]:
data = df[['Time', 'Collecting device ID', 'Module ID', 'Type', 'Value']]

In [9]:
# pivot 사용으로 데이터 재구조화
pivoted = data.pivot(['Time','Collecting device ID','Module ID'], 'Type', 'Value')

#pivoted = data.pivot('Sensor time', 'Type', 'Vlaue')
pivoted = pivoted.reset_index()

In [10]:
pivoted

Type,Time,Collecting device ID,Module ID,co2,humidity,nh3,temp
0,2021-09-17 00:02:02,GW01,1102,544.0,,1.322386,
1,2021-09-17 00:02:30,GW01,2002,,79.45251,,24.69491
2,2021-09-17 00:02:52,GW01,2001,,78.16270,,
3,2021-09-17 00:04:54,GW01,2003,,69.83447,,24.99828
4,2021-09-17 00:05:17,GW01,2000,,82.27934,,24.49655
...,...,...,...,...,...,...,...
1004,2021-09-17 23:55:26,GW01,2000,,82.14072,,23.49831
1005,2021-09-17 23:56:19,GW01,1100,0.0,,0.023635,
1006,2021-09-17 23:56:55,GW01,2003,,,,23.66841
1007,2021-09-17 23:57:02,GW01,2001,,,,23.70508


In [11]:
grouped_df_mean = pivoted.groupby(pd.Grouper(key='Module ID')).count()

In [12]:
module_list = []

In [13]:
grouped_df_mean

Type,Time,Collecting device ID,co2,humidity,nh3,temp
Module ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1102,146,146,144,0,144,0
2002,143,143,0,141,0,141
2001,227,227,0,144,0,143
2003,202,202,0,143,0,143
2000,147,147,0,144,0,144
1100,144,144,143,0,143,0


In [14]:
#grouped_df_mean.replace(0, 'no data')
temp_humidity_condition = (grouped_df_mean.co2 == 0) | (grouped_df_mean.humidity == 0) | (grouped_df_mean.nh3 == 0) | (grouped_df_mean.temp == 0)
#grouped_df_mean[condition].index()

In [15]:
temp_humidity_condition = (grouped_df_mean.loc[2001, 'humidity'] != 0) | (grouped_df_mean.loc[2001, 'temp'] != 0)
nh3_co2_condition = (grouped_df_mean.loc[1101, 'nh3'] != 0) | (grouped_df_mean.loc[1101, 'co2'] != 0)

KeyError: 1101

In [None]:
temp_humidity_condition

In [None]:
grouped_df_mean

In [None]:
grouped_df_mean.loc[1102 ,'co2'] == 0

### 2021-12-17 (막곡 real_weight, analysis_weight Join)

In [114]:
import datetime
import pandas as pd
import numpy as np
# 이미지 분석 무게 데이터 로드
df_Image = pd.read_csv("data/weight_막곡_all.csv")
print(df_Image.shape)

# 실제 무게 데이터 로드
df_real = pd.read_csv("data/chickenweight_막곡_all.csv")
print(df_real.shape)

df_Image = df_Image[~df_Image.WEIGHT_PREDICTION_WEIGHT.isnull()]
df_real = df_real[~df_real.SENSOR_DATA.isnull()]

df_Image = df_Image[df_Image['WEIGHT_PREDICTION_WEIGHT'] >= 0]
df_real = df_real[df_real['SENSOR_DATA'] >= 0]

print(df_Image.shape)
print(df_real.shape)

(1140, 12)
(129382, 11)
(724, 12)
(71331, 11)


In [115]:
df_Image.sort_values(by=['CREATE_TIME'])
df_real.sort_values(by=['CREATE_TIME'])

df_Image['CREATE_TIME'] = pd.to_datetime(df_Image['CREATE_TIME'])
df_Image = df_Image.reset_index(drop=True)

df_real['CREATE_TIME'] = pd.to_datetime(df_real['CREATE_TIME'])
df_real = df_real.groupby(pd.Grouper(key='CREATE_TIME')).mean()

In [116]:
df_Image['SENSOR_DATA'] = 'null'

In [117]:
#df_Image['Senordata'] = df_real.loc[df_real.index[0]]
df_real

Unnamed: 0_level_0,MODULE_ID,SENSOR_DATA,MOVING_AVG,SENSOR_TIME,SENSOR_INFO
CREATE_TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-15 16:27:22,3003.0,42.791333,37.840667,2.021122e+13,0.0
2021-12-15 16:29:32,3004.0,360.582000,70.956000,2.021122e+13,0.0
2021-12-15 16:30:09,3003.0,374.888095,367.600714,2.021122e+13,0.0
2021-12-15 16:31:35,3004.0,45.312727,45.160909,2.021122e+13,0.0
2021-12-15 16:32:07,3003.0,51.387561,51.065366,2.021122e+13,0.0
...,...,...,...,...,...
2021-12-17 17:24:08,3006.0,0.736364,0.114545,2.021122e+13,0.0
2021-12-17 17:24:22,3004.0,0.160000,0.350000,2.021122e+13,0.0
2021-12-17 17:24:24,3002.0,224.491818,163.146364,2.021122e+13,0.0
2021-12-17 17:24:52,3001.0,7.925455,66.953636,2.021122e+13,0.0


In [119]:
import re

for i in range(0, len(df_Image)) :
#     if i >=1 :
#         break
    date_min = []
    for j in range(0, len(df_real)) :
#         if j > 10 :
#             break
        date_result = df_Image['CREATE_TIME'].iloc[i] -  df_real.index[j]
        if date_result.days==0 :
            
            date_split = str(date_result).split(' ')
            time = int(re.sub(":","",date_split[2]))
            date_min.append(time)
            
    #print("------------")
    date_index = date_min.index(min(date_min))
    date_index = df_real.index[date_index]
    #print(df_real.loc[date_index]['SENSOR_DATA'])
    df_Image['SENSOR_DATA'].iloc[i] = df_real.loc[date_index]['SENSOR_DATA']
    
        #hour = date_split[0][7] + date_split[0][8]
        #minute = date_split[1]


KeyboardInterrupt: 

In [None]:
df_Image

In [None]:
df_Image['SENSOR_DATA'].notnull().sum()

In [108]:
df_Image = pd.read_csv("data/weight_막곡_all.csv")
print(df_Image.shape)

# 실제 무게 데이터 로드
df_real = pd.read_csv("data/chickenweight_막곡_all.csv")
print(df_real.shape)


(1140, 12)
(129382, 11)


In [109]:
weight_df = pd.read_csv('data/chickenweight_막곡_all.csv')
pixel_df = pd.read_csv('data/weight_막곡_all.csv')

weight_df.head(2)
pixel_df.head(2)

Unnamed: 0,TID,CREATE_TIME,HOUSE_ID,MODULE_ID,DATA_TYPE,ORG_FILE_NAME,WEIGHT_PREDICTION_RESULT_FILE_NAME,WEIGHT_PREDICTION_COUNT,WEIGHT_PREDICTION_PIXEL_MEAN,WEIGHT_PREDICTION_WEIGHT,WEIGHT_PREDICTION_STATUS,SEND_TID
0,24bb28824215,2021-12-15 17:06:18,H02,"CT02,6",real,"H02_CT02,6_20211215170618_farm_image_real_24bb...",,,,,fail,f1c86b8841b0
1,52bdf392452c,2021-12-15 17:06:34,H03,"CT03,6",real,"H03_CT03,6_20211215170634_farm_image_real_52bd...",,,,,fail,24792c474fac


In [110]:
weight_df['CREATE_TIME'] = pd.to_datetime(weight_df.CREATE_TIME, format='%Y-%m-%d %H:%M:%S')
pixel_df['CREATE_TIME'] = pd.to_datetime(pixel_df.CREATE_TIME, format='%Y-%m-%d %H:%M:%S')

In [111]:
weight_df.sort_values('CREATE_TIME', inplace=True)
pixel_df.sort_values('CREATE_TIME', inplace=True)

In [351]:
weight_grp_df = weight_df.groupby(['CREATE_TIME','HOUSE_ID','MODULE_ID'],as_index=False)[['SENSOR_DATA']].mean()
#weight_grp_df[(weight_grp_df.CREATE_TIME >= '2021-12-17 16:37') & (weight_grp_df.HOUSE_ID == 'H01')]
#weight_grp_df[(weight_grp_df.CREATE_TIME >= '2021-12-17 16:47') & (weight_grp_df.HOUSE_ID == 'H01')]
# df_01 = pd.merge_asof(pixel_df[pixel_df.HOUSE_ID=='H01'].iloc[:,[1,2,3,4,5,6,7,8,9]], weight_grp_df[weight_grp_df.HOUSE_ID=='H01'], on="CREATE_TIME", direction="nearest")
# df_02 = pd.merge_asof(pixel_df[pixel_df.HOUSE_ID=='H02'].iloc[:,[1,2,3,4,5,6,7,8,9]], weight_grp_df[weight_grp_df.HOUSE_ID=='H02'], on="CREATE_TIME", direction="nearest")
# df_03 = pd.merge_asof(pixel_df[pixel_df.HOUSE_ID=='H03'].iloc[:,[1,2,3,4,5,6,7,8,9]], weight_grp_df[weight_grp_df.HOUSE_ID=='H03'], on="CREATE_TIME", direction="nearest")
# df_04 = pd.merge_asof(pixel_df[pixel_df.HOUSE_ID=='H04'].iloc[:,[1,2,3,4,5,6,7,8,9]], weight_grp_df[weight_grp_df.HOUSE_ID=='H04'], on="CREATE_TIME", direction="nearest")
#df_list = [df_01, df_02, df_03, df_04]
house_id = set(pixel_df.HOUSE_ID)
df_list = []

for i in range(1, len(house_id)+1):
    globals()['df_{}'.format(i)] = pd.merge_asof(pixel_df[pixel_df.HOUSE_ID=='H0{}'.format(i)].iloc[:,[1,2,3,4,5,6,7,8,9]], weight_grp_df[weight_grp_df.HOUSE_ID=='H0{}'.format(i)], on="CREATE_TIME", direction="nearest")
    df_list.append(globals()['df_{}'.format(i)])
    
df_total = pd.concat(df_list, ignore_index=True)


df_total = df_total[['CREATE_TIME','ORG_FILE_NAME', 'WEIGHT_PREDICTION_PIXEL_MEAN','WEIGHT_PREDICTION_WEIGHT','SENSOR_DATA']]

In [352]:
df_total

Unnamed: 0,CREATE_TIME,ORG_FILE_NAME,WEIGHT_PREDICTION_PIXEL_MEAN,WEIGHT_PREDICTION_WEIGHT,SENSOR_DATA
0,2021-12-15 17:06:59,"H01_CT01,6_20211215170659_farm_image_real_3d2d...",,,107.799091
1,2021-12-15 17:16:33,"H01_CT01,6_20211215171633_farm_image_real_8ab0...",,,2.677273
2,2021-12-15 17:26:38,"H01_CT01,6_20211215172638_farm_image_real_137e...",,,6.211818
3,2021-12-15 17:36:21,"H01_CT01,6_20211215173621_farm_image_real_6df4...",,,6.921818
4,2021-12-15 17:46:45,"H01_CT01,6_20211215174645_farm_image_real_c382...",,,4.122727
...,...,...,...,...,...
1135,2021-12-17 16:36:43,"H04_CT04,6_20211217163643_farm_image_real_518b...","[2144,2325,2176,2102,2039]",103.8,0.372727
1136,2021-12-17 16:46:46,"H04_CT04,6_20211217164646_farm_image_real_7748...",[2347],113.6,-0.732727
1137,2021-12-17 16:56:48,"H04_CT04,6_20211217165648_farm_image_real_34e2...","[2048,1891]",94.2,1.173636
1138,2021-12-17 17:06:45,"H04_CT04,6_20211217170645_farm_image_real_c102...","[2158,2187,2174]",104.7,-0.400909


In [309]:
df_total.to_csv('real_image_weight_compare.csv', encoding='utf-8')

In [350]:
df_total

Unnamed: 0,CREATE_TIME,HOUSE_ID_x,MODULE_ID,DATA_TYPE,ORG_FILE_NAME,WEIGHT_PREDICTION_RESULT_FILE_NAME,WEIGHT_PREDICTION_COUNT,WEIGHT_PREDICTION_PIXEL_MEAN,WEIGHT_PREDICTION_WEIGHT,HOUSE_ID_y,SENSOR_DATA
0,2021-12-15 17:06:59,H01,"CT01,6",real,"H01_CT01,6_20211215170659_farm_image_real_3d2d...",,,,,H01,107.799091
1,2021-12-15 17:16:33,H01,"CT01,6",real,"H01_CT01,6_20211215171633_farm_image_real_8ab0...",,,,,H01,2.677273
2,2021-12-15 17:26:38,H01,"CT01,6",real,"H01_CT01,6_20211215172638_farm_image_real_137e...",,,,,H01,6.211818
3,2021-12-15 17:36:21,H01,"CT01,6",real,"H01_CT01,6_20211215173621_farm_image_real_6df4...",,,,,H01,6.921818
4,2021-12-15 17:46:45,H01,"CT01,6",real,"H01_CT01,6_20211215174645_farm_image_real_c382...",,,,,H01,4.122727
...,...,...,...,...,...,...,...,...,...,...,...
1135,2021-12-17 16:36:43,H04,"CT04,6",real,"H04_CT04,6_20211217163643_farm_image_real_518b...","H04_CT04,6_20211217163643_farm_image_real_518b...",5.0,"[2144,2325,2176,2102,2039]",103.8,H04,0.372727
1136,2021-12-17 16:46:46,H04,"CT04,6",real,"H04_CT04,6_20211217164646_farm_image_real_7748...","H04_CT04,6_20211217164646_farm_image_real_7748...",1.0,[2347],113.6,H04,-0.732727
1137,2021-12-17 16:56:48,H04,"CT04,6",real,"H04_CT04,6_20211217165648_farm_image_real_34e2...","H04_CT04,6_20211217165648_farm_image_real_34e2...",2.0,"[2048,1891]",94.2,H04,1.173636
1138,2021-12-17 17:06:45,H04,"CT04,6",real,"H04_CT04,6_20211217170645_farm_image_real_c102...","H04_CT04,6_20211217170645_farm_image_real_c102...",3.0,"[2158,2187,2174]",104.7,H04,-0.400909


In [323]:
weight_grp_df = weight_df.groupby(['CREATE_TIME','HOUSE_ID'],as_index=False)[['SENSOR_DATA']].mean()
df_list = []
for i in range(0, len(pixel_df)):
    if i == len(pixel_df) -1 :
        break
    #print(pixel_df['CREATE_TIME'].iloc[i])
    #print(pixel_df['CREATE_TIME'].iloc[i+1])
    #print("---------------------------")
    #weight_grp_df[(weight_grp_df.CREATE_TIME >= pixel_df['CREATE_TIME'][i]) & (weight_grp_df.HOUSE_ID == 'H01')]
    #weight_grp_df[(weight_grp_df.CREATE_TIME >= pixel_df['CREATE_TIME'][i+1]) & (weight_grp_df.HOUSE_ID == 'H01')]
    condition = (weight_grp_df.HOUSE_ID=='H01') & (weight_grp_df.CREATE_TIME >= pixel_df['CREATE_TIME'][i]) & (weight_grp_df.CREATE_TIME >= pixel_df['CREATE_TIME'][i+1])

    new_df = pd.merge_asof(pixel_df[pixel_df.HOUSE_ID=='H01'].iloc[:,[1,2,3,4,5,6,7,8,9]], weight_grp_df[condition], on="CREATE_TIME", direction="nearest")
    new_df = new_df[new_df.SENSOR_DATA.notna()]
    df_list.append(new_df)

df_all = pd.concat(df_list, ignore_index = True)

df_all = df_all[['CREATE_TIME','ORG_FILE_NAME', 'WEIGHT_PREDICTION_PIXEL_MEAN','WEIGHT_PREDICTION_WEIGHT','SENSOR_DATA']]
df_all = df_all[~df_all.WEIGHT_PREDICTION_WEIGHT.isnull()]

#df = df[df['SENSOR_DATA'] >= 0]
#df_H01 = df.reset_index(drop = True)
    

In [324]:
df_all.tail(1000)

Unnamed: 0,CREATE_TIME,ORG_FILE_NAME,WEIGHT_PREDICTION_PIXEL_MEAN,WEIGHT_PREDICTION_WEIGHT,SENSOR_DATA
319677,2021-12-16 22:46:43,"H01_CT01,6_20211216224643_farm_image_real_9570...","[2272,2101,2590,2035,2005]",106.1,0.731818
319678,2021-12-16 22:56:41,"H01_CT01,6_20211216225641_farm_image_real_6f51...","[2283,2567,2209,2240,2228,2202,2382,2356,2567]",113.1,0.731818
319679,2021-12-16 23:06:34,"H01_CT01,6_20211216230634_farm_image_real_0aa7...","[2493,2330,2256]",114.3,0.731818
319680,2021-12-16 23:17:14,"H01_CT01,6_20211216231714_farm_image_real_66f1...","[2282,2089,2107]",103.9,0.731818
319681,2021-12-16 23:26:46,"H01_CT01,6_20211216232646_farm_image_real_fb16...",[2469],119.9,0.731818
...,...,...,...,...,...
321192,2021-12-17 16:27:18,"H01_CT01,6_20211217162718_farm_image_real_631f...","[2290,2301,2169,1879,2088,1939,2307,2443,2146,...",104.5,223.090000
321193,2021-12-17 16:37:45,"H01_CT01,6_20211217163745_farm_image_real_9a44...","[3154,2457,2837,2483,2682,2694]",132.7,223.090000
321194,2021-12-17 16:47:17,"H01_CT01,6_20211217164717_farm_image_real_cafe...","[2631,2619,2520,2701,2187,2247]",120.6,223.090000
321195,2021-12-17 16:58:02,"H01_CT01,6_20211217165802_farm_image_real_11e7...","[2804,2562,2785]",132.6,223.090000
