In [3]:
# model.py

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# 生成模拟数据
np.random.seed(42)
data_size = 1000

X = pd.DataFrame({
    '收入中位数': np.random.rand(data_size) * 10,
    '房龄中位数': np.random.rand(data_size) * 50,
    '每户平均房间数': np.random.rand(data_size) * 10,
    '每户平均卧室数': np.random.rand(data_size) * 5,
    '人口数量': np.random.rand(data_size) * 5000,
    '每户平均居住人数': np.random.rand(data_size) * 10,
    '纬度': np.random.rand(data_size) * 10 + 32,  # 32-42之间的随机值
    '经度': np.random.rand(data_size) * 10 - 120  # -120到-110之间的随机值
})
y = X['收入中位数'] * 3 + X['房龄中位数'] * 0.5 + np.random.randn(data_size) * 2  # 模拟房价

# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 训练模型
model = LinearRegression()
model.fit(X_train, y_train)

# 评估模型
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"模型均方误差: {mse}")

# 保存模型
import joblib
joblib.dump(model, 'california_model.pkl')


模型均方误差: 4.668860070308578


['california_model.pkl']

In [4]:
# app.py

import streamlit as st
import pandas as pd
import numpy as np
import joblib

# 加载模型
model = joblib.load('california_model.pkl')

# 应用标题
st.title("模拟房价预测")

# 用户输入
st.header("输入房屋特征:")

# 使用number_input来接收用户输入
MedInc = st.number_input('收入中位数', 0.0, 10.0, 5.0)
HouseAge = st.number_input('房龄中位数', 0.0, 50.0, 25.0)
AveRooms = st.number_input('每户平均房间数', 0.0, 10.0, 5.0)
AveBedrms = st.number_input('每户平均卧室数', 0.0, 5.0, 2.5)
Population = st.number_input('人口数量', 0.0, 5000.0, 2500.0)
AveOccup = st.number_input('每户平均居住人数', 0.0, 10.0, 5.0)
Latitude = st.number_input('纬度', 32.0, 42.0, 37.0)
Longitude = st.number_input('经度', -120.0, -110.0, -115.0)

# 将输入数据转换为DataFrame
input_data = np.array([[MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude]])
input_df = pd.DataFrame(input_data, columns=['收入中位数', '房龄中位数', '每户平均房间数', '每户平均卧室数', '人口数量', '每户平均居住人数', '纬度', '经度'])

# 进行预测
prediction = model.predict(input_df)

# 显示结果
st.subheader('预测的房价（单位：美元）:')
st.write(f"${prediction[0]*100000:.2f}")


2024-08-26 15:28:51.945 
  command:

    streamlit run C:\ProgramData\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
