<a href="https://colab.research.google.com/github/cozyrim/-Competition/blob/main/%EC%8B%9D%EB%8B%A8%EC%B6%94%EC%B2%9C%EC%95%8C%EA%B3%A0%EB%A6%AC%EC%A6%98%EA%B0%9C%EB%B0%9C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [3]:
df = pd.read_csv('/content/230705.csv', encoding='cp949')
diet_data = pd.read_csv('/content/20230105.csv', encoding='cp949')

In [6]:
# 필요한 특성 선택
selected_features = ['단백질(g)', '지방(g)', '탄수화물(g)', '에너지(kcal)']
diet_data_selected = df[selected_features]
diet_data_selected

Unnamed: 0,단백질(g),지방(g),탄수화물(g),에너지(kcal)
0,6.70,5.16,15.94,137
1,3.17,2.28,10.38,75
2,1.45,0.24,10.93,52
3,3.44,0.57,36.77,166
4,4.84,4.55,19.98,140
...,...,...,...,...
8298,1.30,1.68,6.54,43
8299,0.72,0.73,3.51,22
8300,1.07,0.38,4.86,23
8301,4.38,3.64,12.86,100


In [7]:
diet_data_selected.isnull().sum()

단백질(g)          0
지방(g)        6208
탄수화물(g)      6117
에너지(kcal)       0
dtype: int64

In [8]:
# 결측치 행 확인
diet_data_selected.loc[diet_data_selected['지방(g)'].isna()]

Unnamed: 0,단백질(g),지방(g),탄수화물(g),에너지(kcal)
482,5.81,,,148
483,6.22,,,185
484,4.94,,,167
485,6.00,,,211
486,5.00,,,209
...,...,...,...,...
7013,0.00,,,2
7014,0.00,,,0
7015,0.00,,,1
7016,0.47,,,43


In [9]:
# 결측치 모두 0.01으로 처리
diet_data_selected = diet_data_selected.fillna(0.01)

In [10]:
diet_data_selected.isnull().sum()

단백질(g)       0
지방(g)        0
탄수화물(g)      0
에너지(kcal)    0
dtype: int64

In [11]:
# 정규화
scaler = MinMaxScaler()
diet_data_normalized = scaler.fit_transform(diet_data_selected)
diet_data_normalized

# 정규화된 데이터로 데이터 프레임 생성
diet_data_normalized_df = pd.DataFrame(diet_data_normalized, columns=selected_features)
diet_data_normalized_df

Unnamed: 0,단백질(g),지방(g),탄수화물(g),에너지(kcal)
0,0.086889,0.085643,0.029503,0.169554
1,0.041110,0.037842,0.019212,0.092822
2,0.018804,0.003983,0.020230,0.064356
3,0.044612,0.009461,0.068057,0.205446
4,0.062767,0.075519,0.036981,0.173267
...,...,...,...,...
8298,0.016859,0.027884,0.012105,0.053218
8299,0.009337,0.012116,0.006497,0.027228
8300,0.013876,0.006307,0.008995,0.028465
8301,0.056802,0.060415,0.023802,0.123762


In [12]:
# 인덱스 재설정
diet_data_normalized_df = diet_data_normalized_df.reset_index(drop=True)
diet_data_normalized_df

Unnamed: 0,단백질(g),지방(g),탄수화물(g),에너지(kcal)
0,0.086889,0.085643,0.029503,0.169554
1,0.041110,0.037842,0.019212,0.092822
2,0.018804,0.003983,0.020230,0.064356
3,0.044612,0.009461,0.068057,0.205446
4,0.062767,0.075519,0.036981,0.173267
...,...,...,...,...
8298,0.016859,0.027884,0.012105,0.053218
8299,0.009337,0.012116,0.006497,0.027228
8300,0.013876,0.006307,0.008995,0.028465
8301,0.056802,0.060415,0.023802,0.123762


In [20]:
from sklearn.neighbors import NearestNeighbors
# 특성 벡터 데이터
X_train = diet_data_normalized_df.values

# 모델 설계
model = NearestNeighbors(n_neighbors=5, metric='euclidean')

# 모델 학습
model.fit(X_train)


In [25]:
# 새로운 식단 추천 방법1
new_diet = [2.0, 0.5, 0.4, 0.4]  # 예시로 새로운 식단의 특성 벡터를 설정합니다
new_diet_normalized = scaler.transform([new_diet])  # 새로운 식단을 정규화합니다

distances, indices = model.kneighbors(new_diet_normalized, n_neighbors=3)

# 추천된 식단 출력
recommended_diets = diet_data.iloc[indices[0]]
print(recommended_diets)

                     식품코드      식품명 데이터구분코드 데이터구분명  식품기원코드  \
7254  D304-185040000-0001   스프_소고기       D     음식       3   
162   D105-223240000-0001  미역국_소고기       D     음식       1   
8205  D705-223420000-0001   미역국_홍합       D     음식       7   

                    식품기원명  식품대분류코드   식품대분류명  대표식품코드 대표식품명  ...  포화지방산(g)  \
7254             외식(분석함량)        4  죽 및 스프류    4185    스프  ...      0.11   
162            가정식(분석 함량)        5   국 및 탕류    5223   미역국  ...      0.04   
8205  산업체급식(재료량 기반 산출 함량)        5   국 및 탕류    5223   미역국  ...      0.05   

     트랜스지방산(g)  출처코드               출처명   식품중량   업체명 데이터생성방법코드  데이터생성방법명  \
7254       0.0     1  농촌진흥청(국가표준식품성분표)   100g  해당없음         2        수집   
162        0.0     3          식품의약품안전처   400g  해당없음         1        분석   
8205       NaN     4             질병관리청  320ml  해당없음         3        산출   

         데이터생성일자     데이터기준일자  
7254  2018-12-31  2023-01-05  
162   2019-12-31  2023-01-05  
8205  2022-11-30  2023-01-05  

[3 rows x 49 columns]




# 아래부터는 시도했다가 잘 안된거라서
# 위에 셀 까지만 보시면 됩니다.

In [None]:
# 새로운 식단 추천 방법2 컴파일은 되지 않음

# 영양성분 비율 계산
protein_ratio = diet_data['단백질(g)'] / diet_data['에너지(kcal)']
carb_ratio = diet_data['탄수화물(g)'] / diet_data['에너지(kcal)']
fat_ratio = diet_data['지방(g)'] / diet_data['에너지(kcal)']

# 특성 벡터 생성
features = np.column_stack((protein_ratio, carb_ratio, fat_ratio))


In [14]:
has_zero = np.any(features == 0)
has_zero

True

In [15]:

has_zero = np.any(features == 0, axis=0)
has_zero

array([ True,  True,  True])

In [17]:
# 새로운 식단 데이터 정규화
new_diet = {
    '단백질(g)': 20,  # 예시: 단백질 값
    '탄수화물(g)': 80,  # 예시: 탄수화물 값
    '지방(g)': 10,  # 예시: 지방 값
    '에너지(kcal)': 300  # 예시: 칼로리 값
}
new_diet_normalized = scaler.transform([new_diet])  # 새로운 식단을 정규화합니다

distances, indices = model.kneighbors(new_diet_normalized, n_neighbors=3)

# 추천된 식단 출력
recommended_diets = diet_data.iloc[indices[0]]
print(recommended_diets)




TypeError: ignored

In [18]:
#새로운 식단 추천
new_protein_ratio = new_diet['단백질(g)'] / new_diet['에너지(kcal)']
new_carb_ratio = new_diet['탄수화물(g)'] / new_diet['에너지(kcal)']
new_fat_ratio = new_diet['지방(g)'] / new_diet['에너지(kcal)']

new_features = np.array([new_protein_ratio, new_carb_ratio, new_fat_ratio])

# 정규화
normalized_features = (new_features - np.min(features, axis=0)) / (np.max(features, axis=0) - np.min(features, axis=0))

# 정규화된 특성 벡터 출력
print("Normalized Features:")
print(normalized_features)

# 추천된 식단 찾기
distances = np.linalg.norm(features - normalized_features, axis=1)  # 특성 벡터 간 거리 계산
most_similar_index = np.argmin(distances)  # 가장 유사한 식단의 인덱스 찾기
recommended_diet = diet_data.iloc[most_similar_index]  # 추천된 식단 선택

# 추천된 식단 출력
print("Recommended Diet:")
print(recommended_diet)

Normalized Features:
[nan nan nan]
Recommended Diet:
식품코드             D101-004160000-0001
식품명                          국밥_돼지머리
데이터구분코드                            D
데이터구분명                            음식
식품기원코드                             1
식품기원명                     가정식(분석 함량)
식품대분류코드                            1
식품대분류명                            밥류
대표식품코드                          1004
대표식품명                             국밥
식품중분류코드                       100416
식품중분류명                          돼지머리
식품소분류코드                     10041600
식품소분류명                          해당없음
식품세분류코드                            0
식품세분류명                          해당없음
영양성분함량기준량                       100g
에너지(kcal)                        137
수분(g)                           71.6
단백질(g)                           6.7
지방(g)                           5.16
회분(g)                           0.63
탄수화물(g)                        15.94
당류(g)                           0.16
식이섬유(g)                          0.7
칼슘(mg)                