In [79]:
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # 출력을 스트리밍하는 데 사용
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

In [98]:
import pandas as pd
df = pd.read_csv('sample.csv')

In [99]:
df

Unnamed: 0.1,Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,0,2015-05-18,2.392737,2.394751,2.444257,2.392737,2.392737,0.024751,0.024751,0.024751,0.000021,0.000021,0.000021,0.024730,0.024730,0.024730,0.0,0.0,0.0,2.417488
1,1,2016-05-24,2.391958,2.398808,2.448622,2.391958,2.391958,0.030735,0.030735,0.030735,0.001858,0.001858,0.001858,0.028877,0.028877,0.028877,0.0,0.0,0.0,2.422692
2,2,2016-07-11,2.391857,2.404601,2.453521,2.391857,2.391857,0.037685,0.037685,0.037685,0.000021,0.000021,0.000021,0.037665,0.037665,0.037665,0.0,0.0,0.0,2.429542
3,3,2017-02-24,2.391379,2.345960,2.397159,2.391379,2.391379,-0.019575,-0.019575,-0.019575,0.008341,0.008341,0.008341,-0.027916,-0.027916,-0.027916,0.0,0.0,0.0,2.371804
4,4,2017-03-08,2.391354,2.352625,2.401805,2.391354,2.391354,-0.015143,-0.015143,-0.015143,-0.002977,-0.002977,-0.002977,-0.012166,-0.012166,-0.012166,0.0,0.0,0.0,2.376211
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,187,2023-09-21,2.428369,2.392767,2.441506,2.428369,2.428369,-0.011456,-0.011456,-0.011456,-0.007423,-0.007423,-0.007423,-0.004033,-0.004033,-0.004033,0.0,0.0,0.0,2.416913
188,188,2023-10-03,2.428606,2.387403,2.437426,2.428606,2.428606,-0.015404,-0.015404,-0.015404,0.001858,0.001858,0.001858,-0.017262,-0.017262,-0.017262,0.0,0.0,0.0,2.413202
189,189,2023-10-15,2.428844,2.384049,2.435007,2.428844,2.428844,-0.019585,-0.019585,-0.019585,0.001074,0.001074,0.001074,-0.020659,-0.020659,-0.020659,0.0,0.0,0.0,2.409259
190,190,2023-10-27,2.429081,2.388843,2.439615,2.429081,2.429081,-0.014665,-0.014665,-0.014665,0.008341,0.008341,0.008341,-0.023006,-0.023006,-0.023006,0.0,0.0,0.0,2.414416


In [100]:

def ts_analysis(df):
    # 날짜 컬럼을 datetime 형식으로 변환
    df['ds'] = pd.to_datetime(df['ds'])

    # 최대값과 최소값의 일자 찾기
    max_date = df[df['yhat'] == df['yhat'].max()]['ds'].iloc[0]
    min_date = df[df['yhat'] == df['yhat'].min()]['ds'].iloc[0]

    # 계절을 구분하기 위한 함수 정의
    def get_season(month):
        if month in [3, 4, 5]:
            return 'Spring'
        elif month in [6, 7, 8]:
            return 'Summer'
        elif month in [9, 10, 11]:
            return 'Fall'
        else:
            return 'Winter'

    # 월별 계절 할당
    df['season'] = df['ds'].dt.month.apply(get_season)

    # 계절별 평균값 계산
    seasonal_trend = df.groupby('season')['yhat'].mean()

    # 매월 평균 계산
    monthly_avg = df.groupby(df['ds'].dt.month)['yhat'].mean()

    # 전체 기간에 대한 평균 yhat 값
    overall_avg = df['yhat'].mean()

    # 계절별 평균값을 전체 평균값으로 나누어 상대적인 비율 계산
    seasonal_relative = seasonal_trend / overall_avg

    # 매년 평균값 계산
    annual_avg = df.groupby(df['ds'].dt.year)['yhat'].mean()

    # 매년 평균값을 전체 평균값으로 나누어 상대적인 비율 계산
    annual_relative = annual_avg / overall_avg
    annual_relative = annual_relative.reset_index()

    # 매월 평균값을 전체 평균값으로 나누어 상대적인 비율 계산
    monthly_relative = monthly_avg / overall_avg

    return seasonal_relative,annual_relative,monthly_relative, max_date, min_date, seasonal_trend

In [101]:
seasonal_relative,annual_relative_1,monthly_relative, max_date, min_date, seasonal_trend=ts_analysis(df)

In [102]:
len(df)

192

In [103]:
def annual_relative(df):
    string = []
    for i in range(len(df)):
        prompt = str(df['ds'][0]) + ":" + str(df['yhat'][i])
        string.append(prompt)
    return string

In [104]:
annual_relative_1

Unnamed: 0,ds,yhat
0,2015,1.002192
1,2016,1.005769
2,2017,0.993119
3,2018,0.99405
4,2019,0.998342
5,2020,1.001737
6,2021,1.002341
7,2022,1.003716
8,2023,1.004713


In [105]:
string = annual_relative(annual_relative_1)

0
1
2
3
4
5
6
7
8


In [63]:
type(string)

str

In [64]:
string

'2015-05-18:2.417487854291525,2016-05-24:2.422692414562317,2016-07-11:2.429542145024259,2017-02-24:2.371804082222587,2017-03-08:2.3762110053408945,2017-03-20:2.3821689688985392,2017-04-01:2.376451752351503,2017-04-13:2.3874412751062244,2017-04-25:2.410274837695942,2017-05-07:2.409644981044805,2017-05-19:2.4262380478012147,2017-05-31:2.4148289868517487,2017-06-12:2.4049548142150323,2017-06-24:2.413571486790609,2017-07-06:2.4235261899813807,2017-07-18:2.4242730411591724,2017-07-30:2.410307819310358,2017-08-11:2.416383938917784,2017-08-23:2.405599962415613,2017-09-04:2.406110479268253,2017-09-16:2.3944104004948032,2017-09-28:2.3723449139851107,2017-10-10:2.3752223251417464,2017-10-22:2.372595322785708,2017-11-03:2.3790143475311663,2017-11-15:2.3755661947328286,2017-12-09:2.3856495824157875,2017-12-21:2.375478102203137,2018-01-02:2.3746269423790896,2018-01-14:2.372757562415876,2018-01-26:2.395651548213723,2018-02-07:2.3820923047851434,2018-06-19:2.412704608193185,2018-07-01:2.4310560551307

In [106]:
template = """
You are an analyst who looks at and analyzes the vegetation index.

{VegetationIndex}-seasonal_relative : 
[
spring:{df1_1},
summer:{df1_2},
autumn:{df1_3},
winter:{df1_4},
],
{VegetationIndex}-annual_relative : 
[
{annual_relative}
],
{VegetationIndex} - monthly_relative :
[
'January':{df2_1},
'February':{df2_2},
'March':{df2_3},
'April':{df2_4},
'May':{df2_5},
'June':{df2_6},
'July':{df2_7},
'August':{df2_8},
'September':{df2_9},
'October':{df2_10},
'November':{df2_11},
'December':{df2_12}
],
The day when the vegetation index was highest was {max}, and the day when the vegetation index was lowest was {min},
"""

prompt = PromptTemplate(template=template, input_variables=["VegetationIndex","df1_1","df1_2","df1_3","df1_4","df2_1","df2_2","df2_3","df2_4","df2_5","df2_6","df2_7","df2_8","df2_9","df2_10","df2_11","df2_12","min","max"])

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
                model_path="firefly-llama2-13b-chat.Q4_K_M.gguf",
                input={"temperature": 0.75,
                       "max_length": 4000,
                       "top_p": 1},
                callback_manager=callback_manager,
                n_ctx=2048
                )

agent = llm_chain = LLMChain(prompt=prompt, llm=llm)

                input was transferred to model_kwargs.
                Please confirm that input is what you intended.
AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [108]:
seasonal_relative

season
Fall      0.995523
Spring    1.001493
Summer    1.008350
Winter    0.993316
Name: yhat, dtype: float64

In [107]:
# 입력 변수에 대한 값 설정
input_values = {
    "VegetationIndex": "Radar Vegetation Index",
    "df1_1": seasonal_relative[1],
    "df1_2": seasonal_relative[2],
    "df1_3": seasonal_relative[0],
    "df1_4": seasonal_relative[3],
    "df2_1": "85%",
    "df2_2": "86%",
    # ... 나머지 변수들에 대한 값도 마찬가지로 설정 ...
    "df2_12": "87%",
    "min": "1st January",
    "max": "31st July"
}

# LLMChain 실행
text = agent.run(input_values)

ValueError: A single string input was passed in, but this chain expects multiple inputs ({'df2_10', 'df2_6', 'df1_2', 'max', 'df2_2', 'min', 'df2_12', 'VegetationIndex', 'df2_4', 'df1_1', 'df2_8', 'df2_9', 'df2_5', 'df2_7', 'annual_relative', 'df2_3', 'df1_3', 'df2_1', 'df1_4', 'df2_11'}). When a chain expects multiple inputs, please call it by passing in a dictionary, eg `chain({'foo': 1, 'bar': 2})`

In [34]:
text    

' and provide an analysis of the vegetation index trends over time.\n\nReport:\n\nThe vegetation index is an important indicator of ecosystem health and productivity. It provides valuable insights into the extent and distribution of vegetation cover, which is crucial for monitoring land use change, climate variability, and biodiversity conservation. The vegetation index measures the amount of photosynthetically active plant biomass per unit area of land surface, using reflectance measurements from satellites or ground-based sensors.\n\nBased on our analysis, the vegetation index trends over time show a consistent increase in spring, summer, and autumn, followed by a slight decrease in winter. The average vegetation index values for each season are as follows: 99 percent in spring, 101 percent in summer, 102 percent in autumn, and 98 percent in winter.\n\nThese trends suggest that the vegetation cover has been increasing over time, indicating improved ecosystem health and productivity. 

In [35]:
import re

# Use regular expression to replace all newline characters with nothing
cleaned_text = re.sub('\n', '', text)

print(cleaned_text)


 and provide an analysis of the vegetation index trends over time.Report:The vegetation index is an important indicator of ecosystem health and productivity. It provides valuable insights into the extent and distribution of vegetation cover, which is crucial for monitoring land use change, climate variability, and biodiversity conservation. The vegetation index measures the amount of photosynthetically active plant biomass per unit area of land surface, using reflectance measurements from satellites or ground-based sensors.Based on our analysis, the vegetation index trends over time show a consistent increase in spring, summer, and autumn, followed by a slight decrease in winter. The average vegetation index values for each season are as follows: 99 percent in spring, 101 percent in summer, 102 percent in autumn, and 98 percent in winter.These trends suggest that the vegetation cover has been increasing over time, indicating improved ecosystem health and productivity. However, the slig

In [36]:
cleaned_text[1:]

'and provide an analysis of the vegetation index trends over time.Report:The vegetation index is an important indicator of ecosystem health and productivity. It provides valuable insights into the extent and distribution of vegetation cover, which is crucial for monitoring land use change, climate variability, and biodiversity conservation. The vegetation index measures the amount of photosynthetically active plant biomass per unit area of land surface, using reflectance measurements from satellites or ground-based sensors.Based on our analysis, the vegetation index trends over time show a consistent increase in spring, summer, and autumn, followed by a slight decrease in winter. The average vegetation index values for each season are as follows: 99 percent in spring, 101 percent in summer, 102 percent in autumn, and 98 percent in winter.These trends suggest that the vegetation cover has been increasing over time, indicating improved ecosystem health and productivity. However, the slig

In [37]:
import sys
sys.executable

'c:\\Users\\gunwo\\.conda\\envs\\llm\\python.exe'

In [38]:
import googletrans

translator = googletrans.Translator()

result1 = translator.translate(cleaned_text, dest='ko')

print(result1)

Translated(src=en, dest=ko, text=시간이 지남에 따라 식생 지수 트렌드에 대한 분석을 제공합니다.보고 : 식생 지수는 생태계 건강과 생산성의 중요한 지표입니다.그것은 토지 이용 변화, 기후 변동성 및 생물 다양성 보존을 모니터링하는 데 중요합니다.식생 지수는 위성 또는지면 기반 센서의 반사 측정을 사용하여 지표면의 단위 면적당 광합성 활성 식물 바이오 매스의 양을 측정합니다. 분석에 따라 시간이 지남에 따른 식생 지수 트렌드는 봄, 여름, 및 및 봄, 여름, 및 일관된 증가를 보여줍니다.가을, 겨울에는 약간 감소합니다.각 계절의 평균 초목 지수 값은 다음과 같습니다. 봄의 99 %, 여름의 101 %, 가을의 102 %, 겨울의 98 %.이 추세는 초목 커버가 시간이 지남에 따라 증가하고 있음을 나타냅니다.그리고 생산성.그러나 겨울의 약간의 감소는이 기간 동안 식생 커버에 영향을 미치는 초목 성장 또는 토지 이용 관행의 계절적 패턴을 나타낼 수 있습니다., pronunciation=sigan-i jinam-e ttala sigsaeng jisu teulendeue daehan bunseog-eul jegonghabnida.bogo : sigsaeng jisuneun saengtaegye geongang-gwa saengsanseong-ui jung-yohan jipyoibnida. geugeos-eun toji iyong byeonhwa, gihu byeondongseong mich saengmul dayangseong bojon-eul moniteolinghaneun de jung-yohabnida. sigsaeng jisuneun wiseong ttoneunjimyeon giban senseoui bansa cheugjeong-eul sayonghayeo jipyomyeon-ui dan-wi myeonjeogdang gwanghabseong hwalseong sigmul baio maeseuui yang-eul cheugjeongh

In [5]:
import pandas as pd
import numpy as np

# 난수 발생을 위한 시드 설정 (시드를 설정하면 동일한 결과를 얻을 수 있습니다)
np.random.seed(42)

# 1월부터 12월까지의 월을 나타내는 리스트
months = [f"{i}월" for i in range(1, 13)]

# 데이터프레임 생성
data = {
    '월': months,
    '데이터': np.random.rand(12)  # 0에서 1 사이의 랜덤 값으로 채움
}

df = pd.DataFrame(data)

# 결과 출력
print(df)

      월       데이터
0    1월  0.374540
1    2월  0.950714
2    3월  0.731994
3    4월  0.598658
4    5월  0.156019
5    6월  0.155995
6    7월  0.058084
7    8월  0.866176
8    9월  0.601115
9   10월  0.708073
10  11월  0.020584
11  12월  0.969910


In [None]:
def year_trend(df):
    for i in df.columns:
        return 'It is' +i+ 'percent compared to the '+i+' average'

In [17]:
period(df)

('January', 'December')