In [48]:
import pandas as pd
import numpy as np
import seaborn as sns
from pathlib import Path as P
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['KaiTi', 'SimHei', 'FangSong']  # 汉字字体,优先使用楷体，如果找不到楷体，则使用黑体
plt.rcParams['font.size'] = 12  # 字体大小
plt.rcParams['axes.unicode_minus'] = False  # 正常显示负号

import plotly.express as px
from io import StringIO

In [49]:
data="""
牛	牛犊	羊	羊羔	标准羊单位（只）
28	12	409	380	993
20	20	300	500	980
20	12	447	320	923

牛	牛犊	马	马驹	标准羊单位（只）
76	39	17	5	690
70	70	0	0	630
70	60	0	0	600

牛	牛犊	骆驼	马	马驹	羊	羊羔	标准羊单位（只）
87	31		178	56	320	236	2407
75			178		450		1968
50	25	25	178		490		2083

牛	牛犊	羊	羊羔	标准羊单位（只）
50		120		420
50		120		420
52		120		432
"""

data=data.split('\n\n')
data=[d.strip() for d in data]

In [50]:
df_list=[pd.read_csv(StringIO(d), sep='\t') for d in data]


In [51]:
df_list[0]

Unnamed: 0,牛,牛犊,羊,羊羔,标准羊单位（只）
0,28,12,409,380,993
1,20,20,300,500,980
2,20,12,447,320,923


In [52]:
df_list[1]

Unnamed: 0,牛,牛犊,马,马驹,标准羊单位（只）
0,76,39,17,5,690
1,70,70,0,0,630
2,70,60,0,0,600


In [53]:
df_list[2].replace(np.NaN, 0, inplace=True)
df_list[2]

Unnamed: 0,牛,牛犊,骆驼,马,马驹,羊,羊羔,标准羊单位（只）
0,87,31.0,0.0,178,56.0,320,236.0,2407
1,75,0.0,0.0,178,0.0,450,0.0,1968
2,50,25.0,25.0,178,0.0,490,0.0,2083


In [54]:
df_list[3].replace(np.NaN, 0, inplace=True)
df_list[3]

Unnamed: 0,牛,牛犊,羊,羊羔,标准羊单位（只）
0,50,0.0,120,0.0,420
1,50,0.0,120,0.0,420
2,52,0.0,120,0.0,432


In [88]:
df=pd.concat(df_list).replace(np.NaN, 0).sort_index(axis=1)
df.index=pd.Index(range(len(df)))
df

Unnamed: 0,标准羊单位（只）,牛,牛犊,羊,羊羔,马,马驹,骆驼
0,993,28,12.0,409.0,380.0,0.0,0.0,0.0
1,980,20,20.0,300.0,500.0,0.0,0.0,0.0
2,923,20,12.0,447.0,320.0,0.0,0.0,0.0
3,690,76,39.0,0.0,0.0,17.0,5.0,0.0
4,630,70,70.0,0.0,0.0,0.0,0.0,0.0
5,600,70,60.0,0.0,0.0,0.0,0.0,0.0
6,2407,87,31.0,320.0,236.0,178.0,56.0,0.0
7,1968,75,0.0,450.0,0.0,178.0,0.0,0.0
8,2083,50,25.0,490.0,0.0,178.0,0.0,25.0
9,420,50,0.0,120.0,0.0,0.0,0.0,0.0


In [91]:
# df['Y']=df['标准羊单位（只）']-df['羊']
df['Y']=df['标准羊单位（只）']
# df=df.drop(columns=['羊','标准羊单位（只）'])
df=df.drop(columns=['标准羊单位（只）'])
df

Unnamed: 0,牛,牛犊,羊,羊羔,马,马驹,骆驼,Y
0,28,12.0,409.0,380.0,0.0,0.0,0.0,993
1,20,20.0,300.0,500.0,0.0,0.0,0.0,980
2,20,12.0,447.0,320.0,0.0,0.0,0.0,923
3,76,39.0,0.0,0.0,17.0,5.0,0.0,690
4,70,70.0,0.0,0.0,0.0,0.0,0.0,630
5,70,60.0,0.0,0.0,0.0,0.0,0.0,600
6,87,31.0,320.0,236.0,178.0,56.0,0.0,2407
7,75,0.0,450.0,0.0,178.0,0.0,0.0,1968
8,50,25.0,490.0,0.0,178.0,0.0,25.0,2083
9,50,0.0,120.0,0.0,0.0,0.0,0.0,420


In [92]:
df.to_csv('单位羊和其他牲畜.csv', index=False)

In [93]:
df.columns

Index(['牛', '牛犊', '羊', '羊羔', '马', '马驹', '骆驼', 'Y'], dtype='object')

In [94]:
X_list=df.columns.to_list()[:-1]
X_list

['牛', '牛犊', '羊', '羊羔', '马', '马驹', '骆驼']

In [95]:
# df=df.sort_values(X_list)
X=df[X_list].values
y=df['Y'].values
# numVars=[
#     0,1,2,3,6,8
# ]

# X=X[numVars, :]

# y=y[numVars]

X.shape,y.shape

((12, 7), (12,))

In [96]:
X,y

(array([[ 28.,  12., 409., 380.,   0.,   0.,   0.],
        [ 20.,  20., 300., 500.,   0.,   0.,   0.],
        [ 20.,  12., 447., 320.,   0.,   0.,   0.],
        [ 76.,  39.,   0.,   0.,  17.,   5.,   0.],
        [ 70.,  70.,   0.,   0.,   0.,   0.,   0.],
        [ 70.,  60.,   0.,   0.,   0.,   0.,   0.],
        [ 87.,  31., 320., 236., 178.,  56.,   0.],
        [ 75.,   0., 450.,   0., 178.,   0.,   0.],
        [ 50.,  25., 490.,   0., 178.,   0.,  25.],
        [ 50.,   0., 120.,   0.,   0.,   0.,   0.],
        [ 50.,   0., 120.,   0.,   0.,   0.,   0.],
        [ 52.,   0., 120.,   0.,   0.,   0.,   0.]]),
 array([ 993,  980,  923,  690,  630,  600, 2407, 1968, 2083,  420,  420,
         432], dtype=int64))

In [97]:
sol,residual=np.linalg.lstsq(X,y,rcond=None)[:2]


In [98]:
residual

array([1.1039004e-25])

In [99]:
sol

array([6., 3., 1., 1., 6., 3., 6.])

In [100]:
np.dot(X, sol), y

(array([ 993.,  980.,  923.,  690.,  630.,  600., 2407., 1968., 2083.,
         420.,  420.,  432.]),
 array([ 993,  980,  923,  690,  630,  600, 2407, 1968, 2083,  420,  420,
         432], dtype=int64))

In [103]:
df_res = pd.Series(sol, X_list) 123456
.to_frame()
df_res

Unnamed: 0,0
牛,6.0
牛犊,3.0
羊,1.0
羊羔,1.0
马,6.0
马驹,3.0
骆驼,6.0


In [102]:
# 我国习惯以羊为标准畜，各种牲畜折合标准羊的系数为： 牛5、马6、驴3、骡6、骆驼8、猪2、绵羊、山羊1。