In [None]:
import pandas as pd

df = pd.read_parquet('/teamspace/studios/this_studio/nlp-hr-feedback/feature_pipeline/feature_pipeline/datasets/datasource.parquet')
df.head()

In [None]:
df.info()

In [None]:
# Group 1: Job Details
job_details_df = df[['Title', 'Place', 'Job_type', 'Department']]
# Group 2: Job Evaluation Metrics
job_evaluation_df = df[['Overall_rating', 'work_satisfaction', 'Likes', 'Dislikes']]
# Group 3: Work-Life Considerations
work_life_df = df[['work_life_balance', 'career_growth']]
# Group 4: Skill and Financial Aspects
financial_aspects_df = df[['skill_development', 'salary_and_benefits', 'job_security']]


In [None]:
# Creating timestamps for the data
timestamps = pd.date_range(
    end=pd.Timestamp.now(), 
    periods=len(df), 
    freq='D').to_frame(name="event_timestamp", index=False)

In [None]:
# Adding the timestamp column to each DataFrame
job_details_df = pd.concat(objs=[job_details_df, timestamps], axis=1)
job_evaluation_df = pd.concat(objs=[job_evaluation_df, timestamps], axis=1)
work_life_df = pd.concat(objs=[work_life_df, timestamps], axis=1)
financial_aspects_df = pd.concat(objs=[financial_aspects_df, timestamps], axis=1)

In [None]:
patient_ids = pd.DataFrame(data=list(range(len(df))), columns=["id"])

job_details_df = pd.concat(objs=[job_details_df, patient_ids], axis=1)
job_evaluation_df = pd.concat(objs=[job_evaluation_df, patient_ids], axis=1)
work_life_df = pd.concat(objs=[work_life_df, patient_ids], axis=1)
financial_aspects_df = pd.concat(objs=[financial_aspects_df, patient_ids], axis=1)

In [None]:
job_details_df.head()

In [None]:
data_path = "/teamspace/studios/this_studio/nlp-hr-feedback/feature_pipeline/feature_pipeline/data_store/feature_repo/data"
job_details_df.to_parquet(path=f"{data_path}/job_details.parquet")
job_evaluation_df.to_parquet(path=f"{data_path}/job_evaluation.parquet")
work_life_df.to_parquet(path=f"{data_path}/work_life.parquet")
financial_aspects_df.to_parquet(path=f"{data_path}/financial_aspects.parquet")

In [None]:
job_details_df.info()

In [None]:
job_evaluation_df.info()

In [None]:
work_life_df.info()

In [None]:
financial_aspects_df.info()

# API

In [None]:
import pandas as pd
from feast import FeatureStore

# Initialize FeatureStore
store = FeatureStore(repo_path="/teamspace/studios/this_studio/nlp-hr-feedback/feature_pipeline/feature_pipeline/data_store/feature_repo")

def load_data():
    """
    Load the necessary columns from the Parquet file to reduce memory usage.
    """
    # Specify only the required columns for your analysis
    columns_to_load = ["Title", "Place", "Job_type"]
    entity_df = pd.read_parquet(
        path="/teamspace/studios/this_studio/nlp-hr-feedback/feature_pipeline/feature_pipeline/data_store/feature_repo/data/financial_aspects.parquet",
        # columns="event_timestamp"
    )
    return entity_df

def get_data(entity_df):
    """
    Fetch historical features using Feast and return them as a DataFrame.
    """
    training_data = store.get_historical_features(
        entity_df=entity_df,
        features=[
            "job_details:Title",
            "job_details:Place",
            "job_details:Job_type",
        ]
    )

    df = training_data.to_df()
    return df

def display_first_100_rows(dataframe):
    """
    Display the first 100 rows of the DataFrame.
    """
    print(dataframe.head(100))

# Load data efficiently
entity_df = load_data()

# Get the data
data = get_data(entity_df)

# Display the first 100 rows
display_first_100_rows(data)


In [8]:
import pandas as pd
from feast import FeatureStore


store = FeatureStore(repo_path="/teamspace/studios/this_studio/nlp-hr-feedback/feature_pipeline/feature_pipeline/data_store/feature_repo")

entity_df = pd.read_parquet(path="/teamspace/studios/this_studio/nlp-hr-feedback/feature_pipeline/feature_pipeline/data_store/feature_repo/data/entity.parquet")
entity_df = entity_df.head(100)


feature_service = store.get_feature_service("moodlens_features")
training_data = store.get_historical_features(features=feature_service, entity_df=entity_df)


df = training_data.to_df()
df.head()


# def get_data():
#     """
#     Fetch historical features using Feast and return them as a dictionary.
#     """
#     training_data = store.get_historical_features(
#         entity_df=entity_df,
#         features=[
#             "job_details:Title",
#             "job_details:Place",
#             "job_details:Job_type",
#         ]
#     )

#     df = training_data.to_df()
#     data = df.to_dict(orient="records")  
#     return data


# data = get_data()
# data



Unnamed: 0,event_timestamp,id,Overall_rating,work_satisfaction,Likes,Dislikes,skill_development,salary_and_benefits,job_security
0,1959-07-17 15:50:40.751697+00:00,0.0,1.0,1.0,Only skill development is good.,"working culture,behavior, work life balance,jo...",5.0,5.0,1.0
1,1959-07-18 15:50:40.751697+00:00,1.0,1.0,5.0,Nothing,Company is good but company owners are treatin...,1.0,5.0,1.0
2,1959-07-19 15:50:40.751697+00:00,2.0,1.0,1.0,Best company,Work life\nNo business ethics \nNot career ori...,1.0,5.0,3.0
3,1959-07-20 15:50:40.751697+00:00,3.0,4.0,3.0,Hr policy is very clear and skill oriented . N...,Reference basis appraisals done in current sce...,3.0,5.0,2.0
4,1959-07-21 15:50:40.751697+00:00,4.0,4.0,4.0,Culture\nWork life balance\nSalary and perks\n...,Internal Politics\nUnfair division of business...,4.0,5.0,4.0


In [6]:
entity_df.head()

Unnamed: 0,event_timestamp,id
0,1959-07-17 15:50:40.751697,0.0
1,1959-07-18 15:50:40.751697,1.0
2,1959-07-19 15:50:40.751697,2.0
3,1959-07-20 15:50:40.751697,3.0
4,1959-07-21 15:50:40.751697,4.0


In [5]:
d = data.to_df()
d.head()

In [None]:
len(d)

In [3]:
import requests
import time
response = requests.get('http://localhost:8888/test', stream=True)

for r in response.iter_lines():
    time.sleep(1)
    print(r)

b'loan_id, no_of_dependents, education, self_employed, income_annum, loan_amount, loan_term, cibil_score, residential_assets_value, commercial_assets_value, luxury_assets_value, bank_asset_value, loan_status'
b'1,2, Graduate, No,9600000,29900000,12,778,2400000,17600000,22700000,8000000, Approved'
b'2,0, Not Graduate, Yes,4100000,12200000,8,417,2700000,2200000,8800000,3300000, Rejected'
b'3,3, Graduate, No,9100000,29700000,20,506,7100000,4500000,33300000,12800000, Rejected'
b'4,3, Graduate, No,8200000,30700000,8,467,18200000,3300000,23300000,7900000, Rejected'
b'5,5, Not Graduate, Yes,9800000,24200000,20,382,12400000,8200000,29400000,5000000, Rejected'
b'6,0, Graduate, Yes,4800000,13500000,10,319,6800000,8300000,13700000,5100000, Rejected'
b'7,5, Graduate, No,8700000,33000000,4,678,22500000,14800000,29200000,4300000, Approved'
b'8,2, Graduate, Yes,5700000,15000000,20,382,13200000,5700000,11800000,6000000, Rejected'
b'9,0, Graduate, Yes,800000,2200000,20,782,1300000,800000,2800000,600000

KeyboardInterrupt: 